diff --git a/kitchen_counter/scriptAudit/mnf_script_audit.py b/kitchen_counter/scriptAudit/mnf_script_audit.py index 42f91cb..f132e2c 100755 --- a/kitchen_counter/scriptAudit/mnf_script_audit.py +++ b/kitchen_counter/scriptAudit/mnf_script_audit.py @@ -14,15 +14,23 @@ from io import BytesIO import datetime import pytz import subprocess +import warnings + + + + +warnings.simplefilter(action='ignore', category=FutureWarning) +pd.options.mode.copy_on_write = False +pd.options.mode.chained_assignment = None +warnings.filterwarnings("ignore", category=DeprecationWarning) +warnings.filterwarnings("ignore", category=RuntimeWarning) -# from django_q.tasks import async_task -# from django_q.brokers import Broker class NeutralAudit: def __init__( self, script_id: str = None, - log: bool = True, + log: bool = False, ) -> None: """ To Audit a Script already uploded. @@ -42,47 +50,35 @@ class NeutralAudit: Return : None """ + # pd.options.mode.copy_on_write = False + # pd.options.mode.chained_assignment = None + self.start_time_count = time.time() print("<<<<<<<<<<<<<<<<<<<<<<<<<") self.matrices_path = str(Path(__file__).resolve().parent) + "/matrices/" - self.total_time_file = str(Path(__file__).resolve().parent) - print(script_id,"SCRIPT-ID IS HERE|| AYYA") self.script_id = script_id - audit_root_dir = ( - str(Path(__file__).resolve().parent.parent) + "/media/audit_folder/" - ) - + audit_root_dir = (str(Path(__file__).resolve().parent.parent) + "/media/audit_folder/") self.script_name = str(self.script_id) - # self.total_line_before_audit = 1 output_dir = os.path.join(audit_root_dir, self.script_name) t_time_file = self.total_time_file + "/tail_errors.txt" - # with open(t_time_file, "a") as file008: - # file008.write(str(self.start_time_count)) - file_to_audit = File.objects.get( script=script_id, type="script-original", ) self.input_script = file_to_audit.file.path - if not os.path.exists(output_dir): try: - os.mkdir(output_dir) + os.makedirs(output_dir,exist_ok=True) except Exception as exp: print(repr(exp)) subprocess.run(["mkdir", output_dir]) subprocess.run(["chmod", "777", output_dir]) - ##print(output_dir) self.base_file_path = str(output_dir) + "/" - - self.csv_removed_space_between_words = ( - self.base_file_path + "space_between_words_removed.csv" - ) - + self.csv_removed_space_between_words = (self.base_file_path + "space_between_words_removed.csv") self.audit_report_csv = self.base_file_path + "audit_spreadsheet.csv" - + sys.stdout = open(os.devnull, "w") if log: log_file = self.base_file_path + "_log.txt" @@ -91,15 +87,7 @@ class NeutralAudit: else: self.gen_int_files = False sys.stdout = sys.__stdout__ - - - - self.audit_model_obj = ScriptAuditModel.objects.get( - script = Script.objects.get( - id = self.script_id, - ) - ) - + self.audit_model_obj = ScriptAuditModel.objects.get(script = Script.objects.get(id = self.script_id,)) time_file = self.base_file_path + "time_taken.txt" start_time = datetime.datetime.now() print(start_time) @@ -1129,6 +1117,11 @@ class NeutralAudit: #if self.gen_int_files: df.to_csv(csv_after_gen_and_sort_weights, index = False) ## + + ## remove some columns + df.drop(['first_largest', 'second_largest','third_largest','fourth_largest','fifth_largest','sixth_largest','seventh_largest','eight_largest','ninth_largest','tenth_largest','eleventh_largest','twelth_largest','thirteenth_largest','fourteenth_largest','fifteenth_largest','sixteenth_largest','seventeenth_largest','eighteenth_largest','ninteenth_largest','tewenty_largest','tone_largest','ttwo_largest','tthree_largest','tfour_largest','tfive_largest','tsix_largest','tseven_largest','teight_largest'], axis=1, inplace=True) + + sf.prep_for_pos_elimination(df) with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.prep_for_pos_elimination 5 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") @@ -2200,6 +2193,7 @@ class NeutralAudit: # audit_report_buffer = sf.print_audit_report_tabular_docx(audit_df,line_count_before_audit,line_count_after_audit) #commented on 13-09-23 para_filetered_audut_df = sf.assign_para_no(audit_df) + print("after para assign") with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER para_filetered_audut_df\n") @@ -2256,15 +2250,21 @@ class NeutralAudit: except: print("page number and language insertion failed") pass + auditdf_before_table_creation = self.base_file_path + "auditbefore_table.csv" + para_filetered_audut_df.to_csv(auditdf_before_table_creation) + print("B4 audit report buffer") + audit_report_buffer = sf.print_audit_report_tabular_docx(para_filetered_audut_df,scriptname,author,pre_audit_pagenumber,postauditpagenumber,preaudit_line_no,postaudit_line_no,script_language,dialogue_language) + + print("after audit buffer") with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.print_audit_report_tabular_docx 87 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") - # audit_report_path = self.base_file_path + "audit_report_doc.docx" - # report_data = Document(audit_report_buffer) - # report_data.save(audit_report_path) - - req_file = ContentFile(audit_report_buffer.read(), audit_report_name) - + print("going into models :- ") + try: + req_file = ContentFile(audit_report_buffer.read(), audit_report_name) + except Exception as exp: + print(repr(exp)) + print("repoo") with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("AFTER sf.print_audit_report_tabular_docx 87 : AFTER ASSIGNING LOCATIONS AUDIT :audit\n"+str(req_file)) @@ -2318,6 +2318,38 @@ class NeutralAudit: def script_meta(self): pass + def audit_ai_gen_script(self,lang: str = None): + + df, _ = self.before_audit(lang) + para_df = pd.DataFrame() + + df = sf_eng.ai_gen_script_to_audited_df(df) + + para_df = sf.merge_line_to_para(df) + + try: + para_df.to_csv(self.base_file_path+ "after_merge_line_para.csv", index = False) + print("para_df is written") + except: + pass + + audited_file_name = self.script_name + ".csv" + + req_file = ContentFile( + (para_df.to_csv(index=False, path_or_buf=None)).encode("utf-8"), + audited_file_name, + ) + + ## for local - uncomment + print("\n\n the code is here\n\n") + File.objects.create( + script=Script.objects.get(id=self.script_id), + type="script-csv", + file=req_file, + ) + print("\n\n @@@@#$$$$$$$$ csv saved from s2s\n\n") + + def audit_fdx(self): # fdx to audited csv @@ -2461,7 +2493,7 @@ class NeutralAudit: with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("\nafter quick audit : AFTER ASSIGNING LOCATIONS AUDIT :audit\n") - print(quick_audit_flag) + # print(quick_audit_flag) def get_character_list(self, lang: str = None): @@ -2546,7 +2578,7 @@ class NeutralAudit: output_dir = os.path.join(self.base_file_path, foldername) if not os.path.exists(output_dir): - os.mkdir(output_dir) + os.makedirs(output_dir, exist_ok=True) df_after_audit = pd.read_csv(audited_linewise_csv) df_after_audit.fillna("", inplace=True) @@ -2679,7 +2711,8 @@ class NeutralAudit: data = df_after_audit["data"][index] try: - print(data) + # print(data) + pass except: pass fout.writelines(str(data)) @@ -2700,7 +2733,7 @@ class NeutralAudit: return character_scripts_dict def audit_in_background(self): - + # # commenting os.fork to make code run in foreground # if os.fork() != 0: # return @@ -2715,10 +2748,10 @@ class NeutralAudit: self.audit_model_obj.status = States.SUCCESS self.audit_model_obj.save() print("Audit Success!!!!!!!!!!!!!!!!!!!!!!!") - end_time = datetime.datetime.now() + # end_time = datetime.datetime.now() with open(self.base_file_path + "time_taken.txt", "a") as file007: file007.write("\n\n****AUDITING IS SUCCESSFUL****\n") - print(end_time) + # print(end_time) except Exception as exp: @@ -2726,7 +2759,8 @@ class NeutralAudit: self.audit_model_obj.results = exp self.audit_model_obj.error_msg = "FAILED" self.audit_model_obj.save() - print(end_time) + # print(end_time) + if __name__ == "__main__": naudit = NeutralAudit("123", True) diff --git a/kitchen_counter/scriptAudit/sa_functions.py b/kitchen_counter/scriptAudit/sa_functions.py index ea9a66e..2ed602c 100755 --- a/kitchen_counter/scriptAudit/sa_functions.py +++ b/kitchen_counter/scriptAudit/sa_functions.py @@ -31,6 +31,14 @@ from conversion.translation.detection import script_det, language_detector from conversion.translation.translation_variables import get_language_script_code, language_code from conversion.translation.translation_variables import code_2_language as languages, language_2_code import PyPDF2 +import warnings + +# warnings.simplefilter(action='ignore', category=FutureWarning) +# pd.options.mode.chained_assignment = None +# warnings.simplefilter(action="ignore", category=DeprecationWarning) +# warnings.simplefilter(action="ignore", category=Warning) + + #mypath= str(Path(__file__).resolve().parent.parent) + "/neutralAudit/matrices/" mypath= str(Path(__file__).resolve().parent) + "/matrices/" # mypath = os.getcwd() +'\\' @@ -107,12 +115,12 @@ def conv_pdf_to_docx(input_script,output_converted_docx): start_time = time.time() print("conv_pdf_to_docx Start time ", start_time) parse(input_script,output_converted_docx,start=0,end=None) - + end_time = time.time() print("conv_pdf_to_docx End time ", end_time) # print("conv_pdf_to_docx End time ", time.time()) time_taken = end_time - start_time - print("Time taken: ", time_taken) + print("Time taken conv_pdf_to_docx: ", time_taken) def conv_docx_to_txt(input_script,output_converted_txt): # import textwrap @@ -136,10 +144,10 @@ def conv_docx_to_txt(input_script,output_converted_txt): # print(section.left_margin.inches,section.right_margin.inches) margins_inches = section.left_margin.inches + section.right_margin.inches #margins = int((section.left_margin.inches + section.right_margin.inches)*10) - print(margins_inches) + # print(margins_inches) canvas_width_inches = section_width_inches - margins_inches canvas_width = int(canvas_width_inches *10) - print("canvas width",canvas_width) + # print("canvas width",canvas_width) left_margin = int(section.left_margin.inches * 10) except: section = None @@ -170,7 +178,7 @@ def conv_docx_to_txt(input_script,output_converted_txt): - print(section.left_margin.inches) + # print(section.left_margin.inches) except: n =-1 section,canvas_width,left_margin = recalculate_section_properties(n) @@ -179,8 +187,8 @@ def conv_docx_to_txt(input_script,output_converted_txt): first = all_paras[0].paragraph_format #print(first.left_indent) #count = 1 - print("number of paras",len(all_paras)) - + # print("number of paras",len(all_paras)) + #left_margin = 15 @@ -202,12 +210,12 @@ def conv_docx_to_txt(input_script,output_converted_txt): space_after = 0.0 print("space before") - print(space_before) + # print(space_before) print("space after") - print(space_after) + # print(space_after) try: - print("line spacing ",paragraph_format.line_spacing.pt) - print("line spacing rule ",paragraph_format.line_spacing_rule) + # print("line spacing ",paragraph_format.line_spacing.pt) + # print("line spacing rule ",paragraph_format.line_spacing_rule) if paragraph_format.line_spacing.pt < 5 and previous_indent > 20: continue #print("space before",paragraph_format.space_before.pt) @@ -223,7 +231,7 @@ def conv_docx_to_txt(input_script,output_converted_txt): section_changed = True print("checking for continued at section change") text = para.text.split(' ') - print(text) + # print(text) if len(text) == 1: skip_words = ['CONT','CONTD','CONTINUED',"CONT'D"] ## to be replaced by regex ,match @@ -239,7 +247,7 @@ def conv_docx_to_txt(input_script,output_converted_txt): n= n+1 try: section,canvas_width,left_margin = recalculate_section_properties(n) - print(section.left_margin.inches) + # print(section.left_margin.inches) except Exception as e: print(e) continue @@ -318,8 +326,8 @@ def conv_docx_to_txt(input_script,output_converted_txt): line = line.strip() if line: #print(line) - print(fli,li,indent,ri) - print(para.alignment) + # print(fli,li,indent,ri) + # print(para.alignment) try: width = int(canvas_width - (indent + ri*10)) except: @@ -384,7 +392,7 @@ def conv_docx_to_txt(input_script,output_converted_txt): n = n +1 print("Section changed") section,canvas_width,left_margin = recalculate_section_properties(n) - print(section.left_margin.inches) + # print(section.left_margin.inches) if space_after >5.0: @@ -394,11 +402,11 @@ def conv_docx_to_txt(input_script,output_converted_txt): print("\n") previous_indent = indent + left_margin - end_time = time.time() + end_time = time.time() print("conv_docx_to_txt End time ", end_time) # print("conv_pdf_to_docx End time ", time.time()) time_taken = end_time - start_time - print("Time taken: ", time_taken) + print("Time taken: conv_docx_to_txt", time_taken) def conv_pdf_to_txt(input_script,output_converted_txt): @@ -406,21 +414,21 @@ def conv_pdf_to_txt(input_script,output_converted_txt): start_time = time.time() print("conv_pdf_to_txt Start time ", start_time) reader = PdfReader(input_script) - print(len(reader.pages)) + print(len(reader.pages)) end_page = len(reader.pages) for sheet in range(end_page): page = reader.pages[sheet] text_data = (page.extract_text(extraction_mode="layout")) with open(output_converted_txt, "a", encoding="utf8") as out_file: out_file.write(text_data) - print("conv_pdf_to_txt End time ", time.time()) - end_time = time.time() + print("conv_pdf_to_txt End time ", time.time()) + end_time = time.time() print("conv_pdf_to_txt End time ", end_time) # print("conv_pdf_to_docx End time ", time.time()) time_taken = end_time - start_time - print("Time taken: ", time_taken) - - + print("Time taken: conv_pdf_to_txt", time_taken) + + # def conv_pdf_to_txt(input_script,output_converted_txt): # # Load your PDF @@ -480,7 +488,7 @@ def conv_pdf_to_txt_java(input_script,output_converted_txt): def conv_to_txt(input_script, output_converted_docx, output_converted_txt): - print("conv_to_txt Start time ", time.time()) + start_time = time.time() extention = input_script.rsplit(".", 1)[-1] if extention == "txt": @@ -506,11 +514,13 @@ def conv_to_txt(input_script, output_converted_docx, output_converted_txt): else: raise ScriptAuditException(f"{extention} file is not supported for Audit!") - - print("conv_to_txt End time ", time.time()) + end_time = time.time() + time_taken = end_time - start_time + print("Time taken conv_to_txt: ", time_taken) + print("conv_to_txt End time conv_to_txt", time.time()) def conv_to_df(txt_script) : - print("conv_to_df Start time ", time.time()) + start_time = time.time() script_data = open(txt_script, 'r', encoding="utf-8").read() script_data = script_data.split("\n") @@ -532,13 +542,17 @@ def conv_to_df(txt_script) : print("processing line",line_no) #print(data) df.loc[len(df.index)] = [str(line_no),data,'','No'] - print("conv_to_df End time ", time.time()) + # print("conv_to_df End time ", time.time()) + end_time = time.time() + time_taken = end_time - start_time + print("Time taken conv_to_df: ", time_taken) return df def conv_to_csv(txt_script,csv_for_processing) : #print(csv_for_processing) - print("conv_to_csv start time ", time.time()) + # print("conv_to_csv start time ", time.time()) + start_time = time.time() import csv script_data = open(txt_script, 'r', encoding="utf-8").read() @@ -575,15 +589,19 @@ def conv_to_csv(txt_script,csv_for_processing) : # writing the data rows csvwriter.writerow([str(line_no),data,'','No']) - print("conv_to_csv End time ", time.time()) + # print("conv_to_csv End time ", time.time()) + end_time = time.time() + time_taken = end_time - start_time + print("Time taken conv_to_csv: ", time_taken) def pre_assign_wts(df): - print("pre_assign_wts Start time ", time.time()) + # print("pre_assign_wts Start time ", time.time()) + start_time = time.time() skip_words = ['INT.','EXT.','I/E','E/I','CUT TO','CUT BACK TO','FLASHCUT TO','DISSOLVE TO', 'INTERCUT', 'INTER CUT','PBS', 'INTERVAL', 'FLASHBACK','FADE IN','FADE TO BLACK','ON THE SCREEN','ON THE TV','MORNING','AT HOTEL','TV','MONTAGES','MUSICAL MONTAGES','ESSENTIALS','LATER','ESSENTIAL'] pos_sp_dial_line_nos = df.loc[(df['data'].str.strip().str.contains(r':-|:|-|".*"') == True) & (df['data'].str.strip().str.contains('|'.join(skip_words)) == False) ,'line_no'].to_list() - print(pos_sp_dial_line_nos) + # print(pos_sp_dial_line_nos) new_pos_sp_dial_line_nos =pos_sp_dial_line_nos for index in df.loc[df['line_no'].isin(pos_sp_dial_line_nos),:].index: data = df['data'][index] @@ -596,11 +614,8 @@ def pre_assign_wts(df): pos_dia = '' pos_sp_par = '' - print(df.dtypes) - try: - print(data) - except: - pass + # print(df.dtypes) + if ":-" in data: pos_sp_par = data.split(":-")[0] @@ -626,9 +641,9 @@ def pre_assign_wts(df): pos_sp = pos_sp_par pos_par = '' - print(pos_sp) - print(pos_par) - print(pos_dia) + # print(pos_sp) + # print(pos_par) + # print(pos_dia) if pos_sp: has_digit = any(chr.isdigit() for chr in pos_sp) @@ -645,7 +660,7 @@ def pre_assign_wts(df): df.loc[index + 0.3,'preassigned_weights'] = 'ps10-20' new_pos_sp_dial_line_nos.append(new_line_no) - print("split pos_par",df.loc[index + 0.3,'line_no']) + # print("split pos_par",df.loc[index + 0.3,'line_no']) if pos_dia: print("1",df.dtypes) df.loc[index + 0.6] = np.nan @@ -664,14 +679,18 @@ def pre_assign_wts(df): df = df.sort_index().reset_index(drop=True) for index in df.index: df['line_no'][index] = float(index + 1) - print("pre_assign_wts End time ", time.time()) + # print("pre_assign_wts End time pre_assign_wts", time.time()) + end_time = time.time() + time_taken = end_time - start_time + print("Time taken pre_assign_wts: ", time_taken) return df def create_audit_df(df): - print("create_audit_df Start time ", time.time()) + # print("create_audit_df Start time ", time.time()) + start_time = time.time() audit_df = df[['line_no','data']] audit_df['Identification_Status'] = '' audit_df['data_corrected'] = '' @@ -699,11 +718,14 @@ def create_audit_df(df): audit_df.set_index('line_no',inplace=True) print("create_audit_df End time ", time.time()) + end_time = time.time() + time_taken = end_time - start_time + print("Time taken : create_audit_df", time_taken) return audit_df def trim_intro(df,audit_df): - print("trim_intro Start time ", time.time()) - + # print("trim_intro Start time ", time.time()) + start_time = time.time() stopwords = ['FADE IN' ] remove_upto = -1 intro_removed = False @@ -731,10 +753,14 @@ def trim_intro(df,audit_df): break if intro_removed: break - print("trim_intro Start time ", time.time()) + end_time = time.time() + time_taken = end_time - start_time + print("Time taken trim_intro: ", time_taken) + # print("trim_intro Start time ", time.time()) def remove_page_numbers(df,audit_df): - print("remove_page_numbers Start time ", time.time()) + # print("remove_page_numbers Start time ", time.time()) + start_time = time.time() page_no_found = False for index in df.index: data = df['data'][index] @@ -754,11 +780,14 @@ def remove_page_numbers(df,audit_df): audit_df['line_removed'][line_no] = 'Yes' audit_df['page_no'][line_no] = 'Yes' - - print("remove_page_numbers End time ", time.time()) + end_time = time.time() + time_taken = end_time - start_time + print("Time taken :remove_page_numbers ", time_taken) + # print("remove_page_numbers End time ", time.time()) def get_per_uppercase(text): - print("get_per_uppercase Start time ", time.time()) + # print("get_per_uppercase Start time ", time.time()) + # start_time = time.time() count_upper = 0 for ch in text.strip(): if ch.isupper(): @@ -770,7 +799,8 @@ def get_per_uppercase(text): print("get_per_uppercase End time ", time.time()) return 0 def prep_for_audit(df): - print("prep_for_audit Start time ", time.time()) + # print("prep_for_audit Start time ", time.time()) + start_time = time.time() df.reset_index(inplace=True, drop=True) import re print("Entering prep_for_audit") @@ -916,7 +946,7 @@ def prep_for_audit(df): print(str(df['line_no'])) print("692",df['line_no'][i]) - print(nnbl_line_no) + # print(nnbl_line_no) print("694") df['pnbl_line_no'][index] = pnbl_line_no df['nnbl_line_no'][index] = nnbl_line_no @@ -980,7 +1010,7 @@ def prep_for_audit(df): # ndil while ndil_index <= df.index[-1]: ndil_indent = df['ssc'][ndil_index] - print(cur_indent,ndil_indent) + # print(cur_indent,ndil_indent) if df['Identification_Status'][ndil_index] != 'blank' and ndil_indent != cur_indent: ndil_line_no = df['line_no'][ndil_index] break @@ -988,30 +1018,37 @@ def prep_for_audit(df): ndil_index += 1 df['ndil_line_no'][index] = ndil_line_no - print("prep_for_audit End time ", time.time()) + # print("prep_for_audit End time ", time.time()) + end_time = time.time() + time_taken = end_time - start_time + print("Time taken : prep_for_audit ", time_taken) return df def remove_extra_blank_lines(df,audit_df): # remove two or more consequtive blank lines.. keep one - print("remove_extra_blank_lines Start time ", time.time()) + # print("remove_extra_blank_lines Start time ", time.time()) + start_time = time.time() for index in range(0,df.index[-1]): data = df['data'][index] line_no = df['line_no'][index] nl_data = df['data'][index+1] - try: - print(data) - except: - pass + if not data.strip() and not nl_data.strip(): audit_df['line_removed'][line_no] = 'Yes' audit_df['Identification_Status'][line_no] = 'blank' elif not data.strip() and nl_data.strip(): df['plb'][index] = 'N' - print("remove_extra_blank_lines End time ", time.time()) + # print("remove_extra_blank_lines End time ", time.time()) + end_time = time.time() + time_taken = end_time - start_time + print("Time taken : remove_extra_blank_lines", time_taken) + + def remove_blank_line_after_parenthetical(df,audit_df): # remove two or more consequtive blank lines.. keep one - print("remove_blank_line_after_parenthetical Start time ", time.time()) + # print("remove_blank_line_after_parenthetical Start time ", time.time()) + start_time = time.time() for index in range(0,df.index[-1]): data = df['data'][index] line_no = df['line_no'][index] @@ -1025,11 +1062,15 @@ def remove_blank_line_after_parenthetical(df,audit_df): if df['parenthetical'][index] in ('Complete','EndingRight') and not nl_data.strip(): audit_df['line_removed'][nl_line_no] = 'Yes' df['nlb'][index] = 'N' - print("remove_blank_line_after_parenthetical End time ", time.time()) + # print("remove_blank_line_after_parenthetical End time ", time.time()) + end_time = time.time() + time_taken = end_time - start_time + print("Time taken :remove_blank_line_after_parenthetical ", time_taken) def merge_broken_lines(df,audit_df): - print("merge_broken_lines Start time ", time.time()) + # print("merge_broken_lines Start time ", time.time()) + start_time = time.time() index_iter = iter(range(0,df.index[-1])) for index in index_iter: @@ -1071,8 +1112,8 @@ def merge_broken_lines(df,audit_df): print(index,line_no,cur_line_indent,next_nbl_indent,two_line_len) - print(cur_line_data) - print(next_nbl_data) + # print(cur_line_data) + # print(next_nbl_data) if two_line_len < 150: @@ -1083,9 +1124,9 @@ def merge_broken_lines(df,audit_df): df['case'][index] = case # lcp = get_last_char_pos(two_line_data) # df['last_character_placement'][index] = lcp - print(line_no) + # print(line_no) audit_df['line_merged_with_next_line'][line_no] = 'Yes' - print(two_line_data) + # print(two_line_data) audit_df['line_removed'][nnbl_line_no] = 'Yes' # try: @@ -1107,20 +1148,26 @@ def merge_broken_lines(df,audit_df): else: - print(cur_line_data) + # print(cur_line_data) + pass else: - print(index,cur_line_indent,next_nbl_indent) + # print(index,cur_line_indent,next_nbl_indent) try: - print(cur_line_data) + # print(cur_line_data) + pass except: pass - print("merge_broken_lines End time ", time.time()) + # print("merge_broken_lines End time ", time.time()) + end_time = time.time() + time_taken = end_time - start_time + print("Time taken : merge_broken_lines ", time_taken) #newfile.write(cur_line_data) def remove_space_between_words(df,audit_df): - print("remove_space_between_words Start time ", time.time()) + # print("remove_space_between_words Start time ", time.time()) + start_time = time.time() lines_removed = audit_df.loc[audit_df['line_removed'] == 'Yes'].index.to_list() # remove extra spaces between the words for index in df.index: @@ -1141,19 +1188,23 @@ def remove_space_between_words(df,audit_df): if new_data.strip() != data.strip(): audit_df['space_removed_between_characters'][line_no] = 'Yes' - print(index) - try: - print(data) - print(new_data) - except: - pass - print("remove_space_between_words End time ", time.time()) + # print(index) + # try: + # print(data) + # print(new_data) + # except: + # pass + end_time = time.time() + time_taken = end_time - start_time + print("Time taken : remove_space_between_words", time_taken) + # print("remove_space_between_words End time ", time.time()) #df = df.loc[df['line_removed'] != 'Yes',:] def get_strict_conditions(csv_strict_conditions): - print("get_strict_conditions Start time ", time.time()) + # print("get_strict_conditions Start time ", time.time()) + start_time = time.time() import pandas as pd conditions_df = pd.read_csv(csv_strict_conditions, index_col = [0], skiprows = [0]) @@ -1189,11 +1240,15 @@ def get_strict_conditions(csv_strict_conditions): 'pdil_plb','pdil_nlb','pdil_par','pdil_vs_cur_indent', 'ndil_plb','ndil_nlb','ndil_par','ndil_vs_cur_indent']] - print("get_strict_conditions End time ", time.time()) + # print("get_strict_conditions End time ", time.time()) + end_time = time.time() + time_taken = end_time - start_time + print("Time taken : get_strict_conditions ", time_taken) return conditions_df def test_strict_conditions(df,csv_strict_conditions): - print("test_strict_conditions Start time ", time.time()) + # print("test_strict_conditions Start time ", time.time()) + start_time = time.time() import pandas as pd left_aligned = True @@ -1274,8 +1329,8 @@ def test_strict_conditions(df,csv_strict_conditions): pnbl_vs_cur_indent = "Less" #print(pnbl_index) - - + + try: @@ -1513,10 +1568,14 @@ def test_strict_conditions(df,csv_strict_conditions): df.loc[df['Identification_Status'] == 'blank','isIdentified'] = 'Yes' - print("test_strict_conditions ENd time ", time.time()) + # print("test_strict_conditions ENd time ", time.time()) + end_time = time.time() + time_taken = end_time - start_time + print("Time taken : test_strict_conditions", time_taken) def prep_weights_csv (weights_csv) : - print("prep_weights_csv Start time ", time.time()) + # print("prep_weights_csv Start time ", time.time()) + start_time = time.time() wts_df = pd.read_csv(weights_csv,skiprows=[0]) wts_df = wts_df.head(50) wts_df.rename(columns={wts_df.columns[1]:'Possibilities',wts_df.columns[2]:'Description', @@ -1585,15 +1644,23 @@ def prep_weights_csv (weights_csv) : wts_df = wts_df.merge(sub, how ='inner', on = ['Possibilities'], suffixes=('','_y')) wts_df.set_index('Possibilities',inplace =True) - print("prep_weights_csv End time ", time.time()) + # print("prep_weights_csv End time ", time.time()) + end_time = time.time() + time_taken = end_time - start_time + print("Time taken : prep_weights_csv", time_taken) return wts_df def give_largest(df, n): - print("give_largest Start time ", time.time()) + # print("give_largest Start time ", time.time()) + start_time = time.time() largest = df.nlargest(n) data = [x for x in largest] index = [f'{i}_largest' for i in range(1, len(largest)+1)] - print("give_largest ENd time ", time.time()) + # print("give_largest ENd time ", time.time()) + + end_time = time.time() + time_taken = end_time - start_time + print("Time taken : prep_weights_csv", time_taken) return pd.Series(data, index=index) @@ -1634,11 +1701,11 @@ def update_parenthetical_neighbor_wt(df): df['ps15'][index+2] += 15 except: pass - end_time = time.time() + end_time = time.time() print("update_parenthetical_neighbor_wt End time ", end_time) # print("conv_pdf_to_docx End time ", time.time()) time_taken = end_time - start_time - print("Time taken: ", time_taken) + print("Time taken: update_parenthetical_neighbor_wt", time_taken) return df def gen_pos_weights(df,weights_csv): @@ -1737,10 +1804,10 @@ def gen_pos_weights(df,weights_csv): except: pass - print(plb) - print(nlb) - print(pnbl_indent) - print(nnbl_indent) + # print(plb) + # print(nlb) + # print(pnbl_indent) + # print(nnbl_indent) ### wights to be assigned based on space count, case, parentheseis and plb/nlb @@ -1862,7 +1929,7 @@ def gen_pos_weights(df,weights_csv): sp_words1 = ['cut to','CUT BACK TO','FLASHCUT TO','dissolve to', 'intercut', 'Inter Cut','PBS', 'interval', 'Flashback','FADE IN','FADE TO BLACK'] for sp_word in sp_words1: - print(sp_word) + # print(sp_word) search_data = data.replace(":","") match = re.match(sp_word,search_data.strip(),re.IGNORECASE) if match: @@ -1876,7 +1943,7 @@ def gen_pos_weights(df,weights_csv): break - print (contains_special,search_data) + # print (contains_special,search_data) if not contains_special: search_data = data.strip() ## check if within quotes @@ -1901,7 +1968,7 @@ def gen_pos_weights(df,weights_csv): sp_words3 = ['INT.','EXT.','I/E','E/I','EXT-','INT-'] if not contains_special: for sp_word in sp_words3: - print(sp_word) + # print(sp_word) #search_data = data.replace(":","") found = re.search(sp_word,data.strip()[0:8]) if found: @@ -1958,11 +2025,11 @@ def gen_pos_weights(df,weights_csv): if 'actual_element' not in df.columns: df['actual_element'] = '' - end_time = time.time() + end_time = time.time() print("gen_pos_weights End time ", end_time) # print("conv_pdf_to_docx End time ", time.time()) time_taken = end_time - start_time - print("Time taken: ", time_taken) + print("Time taken: gen_pos_weights", time_taken) return df def sort_pos_decr_wts(df): @@ -2007,7 +2074,7 @@ def sort_pos_decr_wts(df): Tops = pd.DataFrame(b.apply(lambda x: list(b.columns[np.array(x).argsort()[::-1][:28]]), axis=1).to_list(), columns=['Top1', 'Top2', 'Top3', 'Top4', 'Top5', 'Top6', 'Top7', 'Top8', 'Top9', 'Top10', 'Top11', 'Top12', 'Top13', 'Top14', 'Top15', 'Top16', 'Top17', 'Top18', 'Top19', 'Top20', 'Top21', 'Top22', 'Top23', 'Top24', 'Top25', 'Top26', 'Top27','Top28']) - print(Tops) + # print(Tops) res = pd.concat([df, Tops], axis=1) #print("Ye kuch result hai:",res) @@ -2170,11 +2237,11 @@ def sort_pos_decr_wts(df): df = res.reindex(columns=y) - end_time = time.time() + end_time = time.time() print("sort_pos_decr_wts End time ", end_time) # print("conv_pdf_to_docx End time ", time.time()) time_taken = end_time - start_time - print("Time taken: ", time_taken) + print("Time taken: sort_pos_decr_wts", time_taken) return(df) @@ -2232,7 +2299,7 @@ def prep_for_pos_elimination(df): pos_without_weight.append(pos.split("-")[0]) line_pos_string = ';'.join([str(elem) for elem in pos_without_weight]) - print(line_pos_string) + # print(line_pos_string) df['Identification_Status'][index] = line_pos_string @@ -2282,14 +2349,14 @@ def prep_for_pos_elimination(df): for k in range(0,len(pos_with_wts)-1): wt1 = pos_with_wts[k].split("-")[1] wt2 = pos_with_wts[k+1].split("-")[1] - print(wt1,wt2) + # print(wt1,wt2) if wt2 == wt1: max_pos_index = k+1 continue else: break - print (max_pos_index) + # print (max_pos_index) for j in range(0,max_pos_index+1): pos_not_to_remove.append(df["Identification_Status"][index].split(";")[j]) @@ -2418,15 +2485,15 @@ def prep_for_pos_elimination(df): pos_without_weight.append(pos.split("-")[0]) line_pos_string = ';'.join([str(elem) for elem in pos_without_weight]) - print(line_pos_string) + # print(line_pos_string) df['Identification_Status'][index] = line_pos_string - end_time = time.time() + end_time = time.time() print("prep_for_pos_elimination End time ", end_time) # print("conv_pdf_to_docx End time ", time.time()) time_taken = end_time - start_time - print("Time taken: ", time_taken) + print("Time taken: prep_for_pos_elimination", time_taken) @@ -2467,11 +2534,11 @@ def examine_speaker_pos(df,audit_df): try: if ("".join(data.split()).upper() in unique_speaker_list) or (data.upper() in unique_speaker_list): - print (line_no,data) + # print (line_no,data) if line_no not in speaker_lines_list and df['isIdentified'][index] != 'Yes': speaker_lines_list.append(line_no) except: - print(line_no,data,"data is not str") + # print(line_no,data,"data is not str") pass @@ -2485,20 +2552,20 @@ def examine_speaker_pos(df,audit_df): #print(unique_speaker_list) if two_line_data in unique_speaker_list: #print("Yes") - print(line_no,data) - print(line_no,nl_data) + # print(line_no,data) + # print(line_no,nl_data) speaker_in_two_lines_list.append(line_no) elif two_line_data.lstrip().split("(")[0] in unique_speaker_list and data.lstrip().split("(")[0].strip() not in unique_speaker_list : - print(line_no,data) - print(line_no,nl_data) + # print(line_no,data) + # print(line_no,nl_data) speaker_in_two_lines_list.append(line_no) speaker_lines_list.sort() speaker_in_two_lines_list.sort() - print(speaker_lines_list) - print(speaker_in_two_lines_list) + # print(speaker_lines_list) + # print(speaker_in_two_lines_list) # In[174]: @@ -2813,11 +2880,11 @@ def examine_speaker_pos(df,audit_df): df['isIdentified'][index] = 'Yes' else: df['isIdentified'][index] = 'No' - end_time = time.time() + end_time = time.time() print("examine_speaker_pos End time ", end_time) # print("conv_pdf_to_docx End time ", time.time()) time_taken = end_time - start_time - print("Time taken: ", time_taken) + print("Time taken: examine_speaker_pos", time_taken) return df def examine_speaker_next_lines(df,audit_df): @@ -3416,15 +3483,15 @@ def examine_speaker_next_lines(df,audit_df): df['isIdentified'][index] = 'Yes' else: df['isIdentified'][index] = 'No' - end_time = time.time() + end_time = time.time() print("examine_speaker_next_lines End time ", end_time) # print("conv_pdf_to_docx End time ", time.time()) time_taken = end_time - start_time - print("Time taken: ", time_taken) + print("Time taken: examine_speaker_next_lines", time_taken) return df def prep_pnnbl_wts(csv_pnbl_nnbl,cur_dir): - + start_time = time.time() print("prep_pnnbl_wts Start time ", start_time) pnbl_nnbl_df = pd.read_csv(csv_pnbl_nnbl,skiprows = [0]) @@ -3466,11 +3533,11 @@ def prep_pnnbl_wts(csv_pnbl_nnbl,cur_dir): pnbl_df.to_csv(os.path.join(cur_dir,'pnbl_weights.csv'),index = False) nnbl_df.to_csv(os.path.join(cur_dir,'nnbl_weights.csv'),index = False) - end_time = time.time() + end_time = time.time() print("prep_pnnbl_wts End time ", end_time) # print("conv_pdf_to_docx End time ", time.time()) time_taken = end_time - start_time - print("Time taken: ", time_taken) + print("Time taken: prep_pnnbl_wts", time_taken) #return pnbl_df,nnbl_df @@ -3702,11 +3769,11 @@ def identify_using_pnbl_nnbl(df,identify_using,iteration): pos_decreased = True else: pos_decreased = False - end_time = time.time() + end_time = time.time() print("identify_using_pnbl_nnbl End time ", end_time) # print("conv_pdf_to_docx End time ", time.time()) time_taken = end_time - start_time - print("Time taken: ", time_taken) + print("Time taken: identify_using_pnbl_nnbl", time_taken) return df,new_lines_identified,identify_using,count_lines_identified,line_nos_identified,pos_decreased @@ -3906,11 +3973,11 @@ def remove_ineligible_pos(df,identify_using,iteration): pos_decreased = False print(total_pos_before,total_pos) - end_time = time.time() + end_time = time.time() print("remove_ineligible_pos End time ", end_time) # print("conv_pdf_to_docx End time ", time.time()) time_taken = end_time - start_time - print("Time taken: ", time_taken) + print("Time taken: remove_ineligible_pos", time_taken) return df,new_lines_identified,pos_decreased,count_lines_identified,total_pos @@ -4041,11 +4108,11 @@ def do_while_pnnbl_ineligible(df): print(total_pos_initial,total_pos_after) - end_time = time.time() + end_time = time.time() print("do_while_pnnbl_ineligible End time ", end_time) # print("conv_pdf_to_docx End time ", time.time()) time_taken = end_time - start_time - print("Time taken: ", time_taken) + print("Time taken: do_while_pnnbl_ineligible ", time_taken) return df def examine_same_content_lines(df): @@ -4102,11 +4169,11 @@ def examine_same_content_lines(df): df['When_Identified'][index] = 'ExaminingSameContentLines' else: df['Identification_Status'][index] == 'ps8;ps25' - end_time = time.time() + end_time = time.time() print("examine_same_content_lines End time ", end_time) # print("conv_pdf_to_docx End time ", time.time()) time_taken = end_time - start_time - print("Time taken: ", time_taken) + print("Time taken: examine_same_content_lines", time_taken) return df def examine_action_possibilities_part1(df): @@ -4195,11 +4262,11 @@ def examine_action_possibilities_part1(df): print(cur_line_new_pos) print("\n") continue - end_time = time.time() + end_time = time.time() print("examine_action_possibilities_part1 End time ", end_time) # print("conv_pdf_to_docx End time ", time.time()) time_taken = end_time - start_time - print("Time taken: ", time_taken) + print("Time taken: examine_action_possibilities_part1", time_taken) return df def examine_action_possibilities_part2(df): @@ -4285,11 +4352,11 @@ def examine_action_possibilities_part2(df): # df['Identification_Status'][index] = 'ps5' # df['When_Identified'][index] = 'ExaminingActionPossibilitiesAfterIneligible' # continue - end_time = time.time() + end_time = time.time() print("examine_action_possibilities_part2 End time ", end_time) # print("conv_pdf_to_docx End time ", time.time()) time_taken = end_time - start_time - print("Time taken: ", time_taken) + print("Time taken: examine_action_possibilities_part2", time_taken) return df def examine_same_indent_bunch(df): @@ -4525,11 +4592,11 @@ def examine_same_indent_bunch(df): # In[ ]: print(total_pos_before,total_pos_after) - end_time = time.time() + end_time = time.time() print("examine_same_indent_bunch End time ", end_time) # print("conv_pdf_to_docx End time ", time.time()) time_taken = end_time - start_time - print("Time taken: ", time_taken) + print("Time taken: examine_same_indent_bunch", time_taken) return df def examine_relative_indent(df): @@ -4757,11 +4824,11 @@ def examine_relative_indent(df): # # In[ ]: # print(total_pos_before,total_pos_after) - end_time = time.time() + end_time = time.time() print("examine_relative_indent End time ", end_time) # print("conv_pdf_to_docx End time ", time.time()) time_taken = end_time - start_time - print("Time taken: ", time_taken) + print("Time taken: examine_relative_indent ", time_taken) return df def examine_pos_sp_indent(df,csv_removed_space_between_words,csv_pnnbl_ineligble_after_relative_indent): @@ -4804,11 +4871,11 @@ def examine_pos_sp_indent(df,csv_removed_space_between_words,csv_pnnbl_ineligble df['Identification_Status'][index] = 'ps7' df['isIdentified'][index] = 'Yes' df['When_Identified'][index] = 'ExaminingPossibleSpeakerIndent' - end_time = time.time() + end_time = time.time() print("examine_pos_sp_indent End time ", end_time) # print("conv_pdf_to_docx End time ", time.time()) time_taken = end_time - start_time - print("Time taken: ", time_taken) + print("Time taken: examine_pos_sp_indent", time_taken) return df @@ -4877,11 +4944,11 @@ def examine_action_middle_possibilities_using_pnnbl_top(df): # continue ## till here - end_time = time.time() + end_time = time.time() print("examine_action_middle_possibilities_using_pnnbl_top End time ", end_time) # print("conv_pdf_to_docx End time ", time.time()) time_taken = end_time - start_time - print("Time taken: ", time_taken) + print("Time taken: examine_action_middle_possibilities_using_pnnbl_top", time_taken) return df @@ -5192,11 +5259,11 @@ def examine_speaker_extension(df,audit_df): # audit_df.loc[audit_df['line_no'] == line, 'data_corrected'] = new_data # #print(audit_df.loc[audit_df['line_no'] == line, 'data_corrected']) - end_time = time.time() + end_time = time.time() print("examine_speaker_extension End time ", end_time) # print("conv_pdf_to_docx End time ", time.time()) time_taken = end_time - start_time - print("Time taken: ", time_taken) + print("Time taken: examine_speaker_extension", time_taken) return df @@ -5406,11 +5473,11 @@ def examine_action_using_top2_part1(df): # df['When_Identified'][index] = 'UsingTop2PNNBL' # print("identifying as ps6 using top2 pnbl") - end_time = time.time() + end_time = time.time() print("examine_action_using_top2_part1 End time ", end_time) # print("conv_pdf_to_docx End time ", time.time()) time_taken = end_time - start_time - print("Time taken: ", time_taken) + print("Time taken: examine_action_using_top2_part1 ", time_taken) return df @@ -5519,11 +5586,11 @@ def refine_action_possibilties(df): line_new_pos.remove('ps7') df['Identification_Status'][index] = ";".join(line_new_pos) - end_time = time.time() + end_time = time.time() print("refine_action_possibilties End time ", end_time) # print("conv_pdf_to_docx End time ", time.time()) time_taken = end_time - start_time - print("Time taken: ", time_taken) + print("Time taken: refine_action_possibilties", time_taken) return df @@ -5547,11 +5614,11 @@ def prep_pnnbl_eligible_csv(pnbl_eligibility_matrix,nnbl_eligibility_matrix): pnbl_eligible_df = pd.read_csv(os.path.join(cur_dir,'pnbl_eligible_pos.csv'), index_col = ['Possibilities']) nnbl_eligible_df = pd.read_csv(os.path.join(cur_dir,'nnbl_eligible_pos.csv'), index_col = ['Possibilities']) - end_time = time.time() + end_time = time.time() print("prep_pnnbl_eligible_csv End time ", end_time) # print("conv_pdf_to_docx End time ", time.time()) time_taken = end_time - start_time - print("Time taken: ", time_taken) + print("Time taken: prep_pnnbl_eligible_csv ", time_taken) def check_eligibility_using_identified_pnnbl(df): start_time = time.time() @@ -5650,11 +5717,11 @@ def check_eligibility_using_identified_pnnbl(df): print(total_pos_before,total_pos_after) pos_decreased = True if total_pos_after < total_pos_before else False - end_time = time.time() + end_time = time.time() print("check_eligibility_using_identified_pnnbl End time ", end_time) # print("conv_pdf_to_docx End time ", time.time()) time_taken = end_time - start_time - print("Time taken: ", time_taken) + print("Time taken: check_eligibility_using_identified_pnnbl ", time_taken) return df,pos_decreased,lines_identified @@ -5673,11 +5740,11 @@ def do_while_examine_using_identified_pnnbl(df): total_lines_identified += lines_identified print(iteration,total_lines_identified) print(iteration,total_lines_identified) - end_time = time.time() + end_time = time.time() print("do_while_examine_using_identified_pnnbl End time ", end_time) # print("conv_pdf_to_docx End time ", time.time()) time_taken = end_time - start_time - print("Time taken: ", time_taken) + print("Time taken: do_while_examine_using_identified_pnnbl", time_taken) return df @@ -5896,11 +5963,11 @@ def start_top_identifications_part1(df): if cur_line_pos[0] == 'ps8' and extn_found: df['Identification_Status'][index] = 'ps8' df['When_Identified'][index] = 'StartIdentifyingTopsPart1' - end_time = time.time() + end_time = time.time() print("start_top_identifications_part1 End time ", end_time) # print("conv_pdf_to_docx End time ", time.time()) time_taken = end_time - start_time - print("Time taken: ", time_taken) + print("Time taken: start_top_identifications_part1", time_taken) return df @@ -6028,8 +6095,8 @@ def start_top_identifications_part1_diluted(df): - ## if top is 1,6,7,16 identify them - if cur_line_pos[0] == 'ps1' or (cur_line_pos[0] =='ps6' and pnbl_par =='Absent' and "".join(nnbl_pos) != 'ps6' and nnbl_pos[0] != 'ps5') or cur_line_pos[0] == 'ps7' or (cur_line_pos[0] == 'ps16' and nnbl_pos[0] != 'ps15'): + ## if top is 1,6,7,16 identify them added ps8 aug10-24 + if cur_line_pos[0] == 'ps1' or (cur_line_pos[0] =='ps6' and pnbl_par =='Absent' and "".join(nnbl_pos) != 'ps6' and nnbl_pos[0] != 'ps5') or cur_line_pos[0] == 'ps7' or cur_line_pos[0] == 'ps8' or (cur_line_pos[0] == 'ps16' and nnbl_pos[0] != 'ps15'): try: print(line_no,data) except: @@ -6076,11 +6143,11 @@ def start_top_identifications_part1_diluted(df): df['Identification_Status'][index] = 'ps6' df['When_Identified'][index] = 'StartIdentifyingTopsDiluted' - end_time = time.time() + end_time = time.time() print("start_top_identifications_part1_diluted End time ", end_time) # print("conv_pdf_to_docx End time ", time.time()) time_taken = end_time - start_time - print("Time taken: ", time_taken) + print("Time taken: start_top_identifications_part1_diluted", time_taken) return df def examine_speaker_mix_part1(df,audit_df): @@ -6194,11 +6261,11 @@ def examine_speaker_mix_part1(df,audit_df): df = df.sort_index().reset_index(drop=True) continue - end_time = time.time() + end_time = time.time() print("examine_speaker_mix_part1 End time ", end_time) # print("conv_pdf_to_docx End time ", time.time()) time_taken = end_time - start_time - print("Time taken: ", time_taken) + print("Time taken: examine_speaker_mix_part1", time_taken) return df # df.to_csv(p.output_file_path,index=False) @@ -6436,11 +6503,11 @@ def examine_speaker_mix_part2(df,audit_df): df = df.sort_index().reset_index(drop=True) continue - end_time = time.time() + end_time = time.time() print("examine_speaker_mix_part2 End time ", end_time) # print("conv_pdf_to_docx End time ", time.time()) time_taken = end_time - start_time - print("Time taken: ", time_taken) + print("Time taken: examine_speaker_mix_part2", time_taken) return df # df.to_csv(p.output_file_path, index = False) @@ -6708,11 +6775,11 @@ def start_top_identifications_part2(df): pass df['Identification_Status'][index] = ";".join(line_new_pos) continue - end_time = time.time() + end_time = time.time() print("start_top_identifications_part2 End time ", end_time) # print("conv_pdf_to_docx End time ", time.time()) time_taken = end_time - start_time - print("Time taken: ", time_taken) + print("Time taken: start_top_identifications_part2", time_taken) return df @@ -6831,11 +6898,11 @@ def start_slug_identification(df): - end_time = time.time() + end_time = time.time() print("start_slug_identification End time ", end_time) # print("conv_pdf_to_docx End time ", time.time()) time_taken = end_time - start_time - print("Time taken: ", time_taken) + print("Time taken: start_slug_identification", time_taken) return df @@ -6997,11 +7064,11 @@ def start_top_identifications_part3(df): df['Identification_Status'][index] = 'ps1' df['When_Identified'][index] = 'StartIdentifyingTopsPart3' - end_time = time.time() + end_time = time.time() print("start_top_identifications_part3 End time ", end_time) # print("conv_pdf_to_docx End time ", time.time()) time_taken = end_time - start_time - print("Time taken: ", time_taken) + print("Time taken: start_top_identifications_part3", time_taken) return df @@ -7203,11 +7270,11 @@ def start_top_identifications_part4(df): df['isIdentified'][index] == 'No' else: df['isIdentified'][index] == 'Yes' - end_time = time.time() + end_time = time.time() print("start_top_identifications_part4 End time ", end_time) # print("conv_pdf_to_docx End time ", time.time()) time_taken = end_time - start_time - print("Time taken: ", time_taken) + print("Time taken: start_top_identifications_part4", time_taken) return df @@ -7441,11 +7508,11 @@ def start_top_identifications_part5(df): df['isIdentified'][index] == 'No' else: df['isIdentified'][index] == 'Yes' - end_time = time.time() + end_time = time.time() print("start_top_identifications_part5 End time ", end_time) # print("conv_pdf_to_docx End time ", time.time()) time_taken = end_time - start_time - print("Time taken: ", time_taken) + print("Time taken: start_top_identifications_part5", time_taken) return df @@ -7629,11 +7696,11 @@ def start_top_identifications_part6(df): df['isIdentified'][index] == 'No' else: df['isIdentified'][index] == 'Yes' - end_time = time.time() + end_time = time.time() print("start_top_identifications_part6 End time ", end_time) # print("conv_pdf_to_docx End time ", time.time()) time_taken = end_time - start_time - print("Time taken: ", time_taken) + print("Time taken: start_top_identifications_part6", time_taken) return df @@ -7831,11 +7898,11 @@ def start_top_identifications_part7(df): df['isIdentified'][index] == 'No' else: df['isIdentified'][index] == 'Yes' - end_time = time.time() + end_time = time.time() print("start_top_identifications_part7 End time ", end_time) # print("conv_pdf_to_docx End time ", time.time()) time_taken = end_time - start_time - print("Time taken: ", time_taken) + print("Time taken: start_top_identifications_part7 ", time_taken) return df @@ -8029,11 +8096,11 @@ def start_top_identifications_part8(df): else: df['isIdentified'][index] == 'Yes' - end_time = time.time() + end_time = time.time() print("start_top_identifications_part8 End time ", end_time) # print("conv_pdf_to_docx End time ", time.time()) time_taken = end_time - start_time - print("Time taken: ", time_taken) + print("Time taken: start_top_identifications_part8", time_taken) return df @@ -8185,11 +8252,11 @@ def decrease_wt_dial_between_action(df): df['isIdentified'][index] == 'No' else: df['isIdentified'][index] == 'Yes' - end_time = time.time() + end_time = time.time() print("decrease_wt_dial_between_action End time ", end_time) # print("conv_pdf_to_docx End time ", time.time()) time_taken = end_time - start_time - print("Time taken: ", time_taken) + print("Time taken: decrease_wt_dial_between_action", time_taken) return df @@ -8509,11 +8576,11 @@ def examine_among_two(df): df['isIdentified'][index] == 'No' else: df['isIdentified'][index] == 'Yes' - end_time = time.time() + end_time = time.time() print("examine_among_two End time ", end_time) # print("conv_pdf_to_docx End time ", time.time()) time_taken = end_time - start_time - print("Time taken: ", time_taken) + print("Time taken: examine_among_two", time_taken) return df @@ -8655,11 +8722,11 @@ def examine_action_using_top2_wt_diff(df): print("identifying as ps5") df['Identification_Status'][index] = 'ps5' df['When_Identified'][index] = 'ExamineActionUsingTop2Wt' - end_time = time.time() + end_time = time.time() print("examine_action_using_top2_wt_diff End time ", end_time) # print("conv_pdf_to_docx End time ", time.time()) time_taken = end_time - start_time - print("Time taken: ", time_taken) + print("Time taken: examine_action_using_top2_wt_diff", time_taken) return df @@ -8705,14 +8772,18 @@ def identify_top_as_final(df): df['Identification_Status'][index] = top2 continue + #if 13 is top and 15 is second but nlb =Y then identify as 15 + if (top1 == 'ps13' and top2 == 'ps15' and df['nlb'][index] == 'Y'): + df['Identification_Status'][index] = 'ps15' + continue df['Identification_Status'][index] = top1 # df['isIdentified'][index] = 'No' - end_time = time.time() + end_time = time.time() print("identify_top_as_final End time ", end_time) # print("conv_pdf_to_docx End time ", time.time()) time_taken = end_time - start_time - print("Time taken: ", time_taken) + print("Time taken: identify_top_as_final", time_taken) return df @@ -9465,11 +9536,11 @@ def run_audit_on_identified_backup(df,audit_df): df = df.sort_index().reset_index(drop=True) #df = df.sort_values(by=['line_no']).reset_index(drop =True) - end_time = time.time() + end_time = time.time() print("run_audit_on_identified_backup End time ", end_time) # print("conv_pdf_to_docx End time ", time.time()) time_taken = end_time - start_time - print("Time taken: ", time_taken) + print("Time taken: run_audit_on_identified_backup", time_taken) return df @@ -9494,13 +9565,13 @@ def run_audit_on_identified(df,audit_df = False): except: pass - end_time = time.time() - print("run_audit_on_identified End time ", end_time) - # print("conv_pdf_to_docx End time ", time.time()) - time_taken = end_time - start_time - print("Time taken: ", time_taken) - - + # end_time = time.time() + # print("run_audit_on_identified End time ", end_time) + # # print("conv_pdf_to_docx End time ", time.time()) + # time_taken = end_time - start_time + # print("Time taken: ", time_taken) + + def correct_left_indent(df,index,new_indent,audit_df= False): ## line_no = df['line_no'][index] @@ -10306,11 +10377,11 @@ def run_audit_on_identified(df,audit_df = False): df = df.sort_index().reset_index(drop=True) #df = df.sort_values(by=['line_no']).reset_index(drop =True) - end_time = time.time() + end_time = time.time() print("run_audit_on_identified End time ", end_time) # print("conv_pdf_to_docx End time ", time.time()) time_taken = end_time - start_time - print("Time taken: ", time_taken) + print("Time taken: run_audit_on_identified", time_taken) try: if not audit_df.empty: return df,audit_df @@ -10569,11 +10640,11 @@ def merge_line_to_para(df): para_df['script_element'][para_no] = 'dialogue' para_df['scene_no'][para_no] = scene_no continue - end_time = time.time() + end_time = time.time() print("merge_line_to_para End time ", end_time) # print("conv_pdf_to_docx End time ", time.time()) time_taken = end_time - start_time - print("Time taken: ", time_taken) + print("Time taken: merge_line_to_para", time_taken) return para_df @@ -11038,11 +11109,11 @@ def wrap_text(df,audit_df): df = df.sort_values(by=['line_no']).reset_index(drop =True) print("The df in merge_text123456789") print(df) - end_time = time.time() + end_time = time.time() print("wrap_text End time ", end_time) # print("conv_pdf_to_docx End time ", time.time()) time_taken = end_time - start_time - print("Time taken: ", time_taken) + print("Time taken: wrap_text ", time_taken) return df @@ -11064,11 +11135,11 @@ def check_slug_still_unidentified(df): if ps in ['ps1','ps2','ps18']: slug_still_unidentified = True return slug_still_unidentified - end_time = time.time() + end_time = time.time() print("check_slug_still_unidentified End time ", end_time) # print("conv_pdf_to_docx End time ", time.time()) time_taken = end_time - start_time - print("Time taken: ", time_taken) + print("Time taken: check_slug_still_unidentified", time_taken) return slug_still_unidentified @@ -11138,11 +11209,11 @@ def sa_wrapped_output_to_docx(para_df,output_docx): output_doc.save(output_docx) - end_time = time.time() + end_time = time.time() print("sa_wrapped_output_to_docx End time ", end_time) # print("conv_pdf_to_docx End time ", time.time()) time_taken = end_time - start_time - print("Time taken: ", time_taken) + print("Time taken: sa_wrapped_output_to_docx", time_taken) @@ -11321,11 +11392,11 @@ def sa_output_to_docx(df,output_docx,output_template): output_doc.save(output_docx) - end_time = time.time() + end_time = time.time() print("sa_output_to_docx End time ", end_time) # print("conv_pdf_to_docx End time ", time.time()) time_taken = end_time - start_time - print("Time taken: ", time_taken) + print("Time taken: sa_output_to_docx", time_taken) # def sa_txt_to_docx(script_txt,output_script_docx): @@ -11418,13 +11489,13 @@ def sa_output_to_txt(output_script_docx,output_script_txt): f.write(line) f.write('\n') - end_time = time.time() + end_time = time.time() print("sa_output_to_txt End time ", end_time) # print("conv_pdf_to_docx End time ", time.time()) time_taken = end_time - start_time - print("Time taken: ", time_taken) - - + print("Time taken: sa_output_to_txt", time_taken) + + def print_audit_report_docx(audit_df,audit_report_docx): start_time = time.time() print("print_audit_report_docx Start time ", start_time) @@ -11552,11 +11623,11 @@ def print_audit_report_docx(audit_df,audit_report_docx): output_doc.save(audit_report_docx) - end_time = time.time() + end_time = time.time() print("print_audit_report_docx End time ", end_time) # print("conv_pdf_to_docx End time ", time.time()) time_taken = end_time - start_time - print("Time taken: ", time_taken) + print("Time taken: print_audit_report_docx ", time_taken) def ps_to_script_element(ps): if ps == 'ps1': @@ -13184,12 +13255,12 @@ def convert_to_pdf(input_docx, out_folder): # convert_to_pdf(docfile, base_path_directory) # print("converted to pdf") # print("pdf_file_path",pdf_file_path) - + # pdf = PdfFileReader(open(pdf_file_path, "rb")) # print("after opening pdf") # pdfReader = PyPDF2.PdfReader(pdf) # totalPages1 = len(pdfReader.pages) - + # print("number of pages is:",totalPages1) # try: # return int(totalPages1) @@ -13200,13 +13271,13 @@ def countPages(docfile, pdf_file_path, base_path_directory): convert_to_pdf(docfile, base_path_directory) print("converted to pdf") print("pdf_file_path",pdf_file_path) - + def PdfCounter(pdf_file_path): pdf = open(pdf_file_path, "rb") print("after opening pdf") pdfReader = PyPDF2.PdfReader(pdf) totalPages1 = len(pdfReader.pages) - + print("number of pages is:",totalPages1) try: return int(totalPages1) @@ -13287,11 +13358,11 @@ def csv_to_docx(csv: pd.DataFrame) -> Document: content = "" para.text = content - end_time = time.time() + end_time = time.time() print("csv_to_docx End time ", end_time) # print("conv_pdf_to_docx End time ", time.time()) time_taken = end_time - start_time - print("Time taken: ", time_taken) + print("Time taken: csv_to_docx ", time_taken) return output_doc def language_detector_for_csv(orginal_csv_path): @@ -13319,86 +13390,262 @@ def language_detector_for_csv(orginal_csv_path): Final_lang = [languages[src_lang]] # Final_lang = [language_code[src_lang]] dialogue_lang.append(Final_lang) - end_time = time.time() + end_time = time.time() print("language_detector_for_csv End time ", end_time) # print("conv_pdf_to_docx End time ", time.time()) time_taken = end_time - start_time - print("Time taken: ", time_taken) + print("Time taken: language_detector_for_csv", time_taken) return actionline_lang, dialogue_lang +# def assign_para_no(df): +# start_time = time.time() +# print("assign_para_no Start time ", start_time) +# para_no = 1 + +# df['para_no'] = 0 +# count = 1 +# index_iter = iter(df.index) +# for index in df.index: +# line_pos = df['Identification_Status'][index] + +# if line_pos == 'blank' : +# continue + +# if line_pos == 'ps1': +# df.at[index, 'para_no'] = para_no +# para_no += 1 +# continue + +# if line_pos == "ps2": +# if index + 1 < len(df): +# if df['Identification_Status'][index + 1] == "ps3": +# df.at[index, 'para_no'] = para_no +# df.at[index+1, 'para_no'] = para_no +# para_no += 1 +# continue +# else: +# df.at[index, 'para_no'] = para_no +# para_no += 1 +# continue + +# if line_pos == 'ps4': +# if len(df) < index + 1: +# df.at[index, 'para_no'] = para_no +# spot_index = index +1 +# while df['Identification_Status'][spot_index] in ["ps5","ps6","ps4"]: +# df.at[spot_index, 'para_no'] = para_no +# spot_index += 1 +# para_no += 1 +# continue + + +# if line_pos == 'ps13': +# if index + 1 < len(df): +# df.at[index, 'para_no'] = para_no +# spot_index = index +1 +# while spot_index < len(df) and df['Identification_Status'][spot_index] in ["ps14","ps15","ps13","blank"]: +# if df['Identification_Status'][spot_index] == "blank": +# if spot_index + 1 < len(df) and df['Identification_Status'][spot_index+1] == "ps14": +# df.at[spot_index+1, 'para_no'] = para_no +# spot_index += 1 +# else: +# pass +# df.at[spot_index, 'para_no'] = para_no +# spot_index += 1 +# para_no += 1 +# continue + + +# if line_pos == 'ps6': +# if index + 1 < len(df): +# if df['Identification_Status'][index-1] in ["ps5","ps4"]: +# continue +# else: +# df.at[index, 'para_no'] = para_no +# para_no += 1 + +# if line_pos == "ps7": +# if index + 1 < len(df): +# df.at[index, 'para_no'] = para_no +# spot_index = index +1 +# while df['Identification_Status'][spot_index] in ["ps8","ps9"]: +# df.at[spot_index, 'para_no'] = para_no +# spot_index += 1 +# para_no += 1 +# continue + +# if line_pos == "ps8": +# if df['Identification_Status'][index+1] in ["ps13","ps15"]: +# df.at[index, 'para_no'] = para_no +# para_no += 1 +# continue +# else: +# df.at[index, 'para_no'] = para_no +# para_no += 1 +# continue + + +# if line_pos == 'ps15': +# if df['Identification_Status'][index-1] in ["ps7","ps12","ps10","ps20","ps8","blank"]: +# df.at[index, 'para_no'] = para_no +# para_no += 1 +# continue +# else: +# continue + +# if line_pos == "ps14": +# if df['Identification_Status'][index-1] in ["ps8","ps7"]: +# df.at[index, 'para_no'] = para_no +# spot_index = index +1 +# while df['Identification_Status'][spot_index] == "ps15": +# df.at[spot_index, 'para_no'] = para_no +# spot_index += 1 +# para_no += 1 +# else: +# continue + +# if line_pos == 'ps11': +# if index + 1 < len(df): +# df.at[index, 'para_no'] = para_no +# spot_index = index +1 +# while df['Identification_Status'][spot_index] in ["ps12","ps20"]: +# df.at[spot_index, 'para_no'] = para_no +# spot_index += 1 +# para_no += 1 +# continue + +# if line_pos == "ps12": +# if df['Identification_Status'][index-1] in ["ps11","ps20"]: +# continue +# continue + +# if line_pos == "ps10": +# df.at[index, 'para_no'] = para_no +# para_no += 1 +# continue + +# if line_pos == "ps20": +# if index + 1 < len(df): +# if df['Identification_Status'][index-1] == "ps11": +# continue +# elif df['Identification_Status'][index+1] == "ps12": +# df.at[index, 'para_no'] = para_no +# df.at[index+1, 'para_no'] = para_no +# para_no += 1 +# continue +# para_no += 1 +# continue + +# if line_pos == 'ps17' : +# df.at[index, 'para_no'] = para_no +# para_no += 1 +# continue + +# if line_pos == 'ps16' : +# df.at[index, 'para_no'] = para_no +# para_no += 1 +# continue +# count += 1 +# print(count) +# print(index) +# print("till here") +# columns = list(df.columns) +# columns.insert(3, columns.pop(columns.index('para_no'))) +# df = df[columns] +# end_time = time.time() +# print("assign_para_no End time ", end_time) +# # print("conv_pdf_to_docx End time ", time.time()) +# time_taken = end_time - start_time +# print("Time taken: assign_para_no", time_taken) +# return df + def assign_para_no(df): start_time = time.time() print("assign_para_no Start time ", start_time) para_no = 1 df['para_no'] = 0 - + count = 1 index_iter = iter(df.index) + for index in df.index: + line_pos = df['Identification_Status'][index] if line_pos == 'blank' : continue if line_pos == 'ps1': + print("ps1") df.at[index, 'para_no'] = para_no para_no += 1 continue if line_pos == "ps2": - if df['Identification_Status'][index + 1] == "ps3": - df.at[index, 'para_no'] = para_no - df.at[index+1, 'para_no'] = para_no - para_no += 1 - continue - else: - df.at[index, 'para_no'] = para_no - para_no += 1 - continue + print("ps2") + if index + 1 < len(df): + if df['Identification_Status'][index + 1] == "ps3": + df.at[index, 'para_no'] = para_no + df.at[index+1, 'para_no'] = para_no + para_no += 1 + continue + else: + df.at[index, 'para_no'] = para_no + para_no += 1 + continue if line_pos == 'ps4': - df.at[index, 'para_no'] = para_no - spot_index = index +1 - while df['Identification_Status'][spot_index] in ["ps5","ps6","ps4"]: - df.at[spot_index, 'para_no'] = para_no - spot_index += 1 - para_no += 1 - continue + print("ps4") + if len(df) <= index + 1: + df.at[index, 'para_no'] = para_no + spot_index = index +1 + while df['Identification_Status'][spot_index] in ["ps5","ps6","ps4"]: + df.at[spot_index, 'para_no'] = para_no + spot_index += 1 + para_no += 1 + continue if line_pos == 'ps13': - df.at[index, 'para_no'] = para_no - spot_index = index +1 - while spot_index < len(df) and df['Identification_Status'][spot_index] in ["ps14","ps15","ps13","blank"]: - if df['Identification_Status'][spot_index] == "blank": - if spot_index + 1 < len(df) and df['Identification_Status'][spot_index+1] == "ps14": - df.at[spot_index+1, 'para_no'] = para_no - spot_index += 1 - else: - pass - df.at[spot_index, 'para_no'] = para_no - spot_index += 1 - para_no += 1 - continue + print("ps13") + if index + 1 < len(df): + df.at[index, 'para_no'] = para_no + spot_index = index +1 + while spot_index < len(df) and df['Identification_Status'][spot_index] in ["ps14","ps15","ps13","blank"]: + if df['Identification_Status'][spot_index] == "blank": + if spot_index + 1 < len(df) and df['Identification_Status'][spot_index+1] == "ps14": + df.at[spot_index+1, 'para_no'] = para_no + spot_index += 1 + else: + pass + df.at[spot_index, 'para_no'] = para_no + spot_index += 1 + para_no += 1 + continue if line_pos == 'ps6': - if df['Identification_Status'][index-1] in ["ps5","ps4"]: - continue - else: - df.at[index, 'para_no'] = para_no - para_no += 1 + print("ps6") + if index + 1 < len(df): + if df['Identification_Status'][index-1] in ["ps5","ps4"]: + continue + else: + df.at[index, 'para_no'] = para_no + para_no += 1 if line_pos == "ps7": - df.at[index, 'para_no'] = para_no - spot_index = index +1 - while df['Identification_Status'][spot_index] in ["ps8","ps9"]: - df.at[spot_index, 'para_no'] = para_no - spot_index += 1 - para_no += 1 - continue + print("ps7") + if index + 1 < len(df): + df.at[index, 'para_no'] = para_no + spot_index = index +1 + while df['Identification_Status'][spot_index] in ["ps8","ps9"]: + df.at[spot_index, 'para_no'] = para_no + spot_index += 1 + para_no += 1 + continue if line_pos == "ps8": + print("ps8") if df['Identification_Status'][index+1] in ["ps13","ps15"]: df.at[index, 'para_no'] = para_no para_no += 1 @@ -13410,14 +13657,16 @@ def assign_para_no(df): if line_pos == 'ps15': - if df['Identification_Status'][index-1] in ["ps7","ps12","ps10","ps20","ps8","blank"]: - df.at[index, 'para_no'] = para_no - para_no += 1 - continue - else: - continue + print("ps15") + if df['Identification_Status'][index-1] in ["ps7","ps12","ps10","ps20","ps8","blank"]: + df.at[index, 'para_no'] = para_no + para_no += 1 + continue + else: + continue if line_pos == "ps14": + print("ps14-") if df['Identification_Status'][index-1] in ["ps8","ps7"]: df.at[index, 'para_no'] = para_no spot_index = index +1 @@ -13429,57 +13678,64 @@ def assign_para_no(df): continue if line_pos == 'ps11': - df.at[index, 'para_no'] = para_no - spot_index = index +1 - while df['Identification_Status'][spot_index] in ["ps12","ps20"]: - df.at[spot_index, 'para_no'] = para_no - spot_index += 1 - para_no += 1 - continue + print("ps11") + if index + 1 < len(df): + df.at[index, 'para_no'] = para_no + spot_index = index +1 + while df['Identification_Status'][spot_index] in ["ps12","ps20"]: + df.at[spot_index, 'para_no'] = para_no + spot_index += 1 + para_no += 1 + continue if line_pos == "ps12": + print("ps12") if df['Identification_Status'][index-1] in ["ps11","ps20"]: continue continue if line_pos == "ps10": + print("ps10") df.at[index, 'para_no'] = para_no para_no += 1 continue if line_pos == "ps20": - if df['Identification_Status'][index-1] == "ps11": - continue - elif df['Identification_Status'][index+1] == "ps12": - df.at[index, 'para_no'] = para_no - df.at[index+1, 'para_no'] = para_no + print("ps20") + if index + 1 < len(df): + if df['Identification_Status'][index-1] == "ps11": + continue + elif df['Identification_Status'][index+1] == "ps12": + df.at[index, 'para_no'] = para_no + df.at[index+1, 'para_no'] = para_no + para_no += 1 + continue para_no += 1 continue - para_no += 1 - continue if line_pos == 'ps17' : + print("ps17") df.at[index, 'para_no'] = para_no para_no += 1 continue if line_pos == 'ps16' : + print("ps16") df.at[index, 'para_no'] = para_no para_no += 1 continue - - + count += 1 + print("till here") columns = list(df.columns) columns.insert(3, columns.pop(columns.index('para_no'))) df = df[columns] - end_time = time.time() + end_time = time.time() print("assign_para_no End time ", end_time) # print("conv_pdf_to_docx End time ", time.time()) time_taken = end_time - start_time - print("Time taken: ", time_taken) + print("Time taken: assign_para_no", time_taken) return df - def print_audit_report_tabular_docx(audit_df,scriptname,author,pre_audit_pagenumber,postauditpagenumber,preaudit_line_no,postaudit_line_no,script_language,dialogue_language): start_time = time.time() print("print_audit_report_tabular_docx Start time ", start_time) @@ -13541,7 +13797,7 @@ def print_audit_report_tabular_docx(audit_df,scriptname,author,pre_audit_pagenum #total no of actionline total_actionlines = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['Identification_Status'].isin(['ps4', 'ps5', 'ps6'])), :]) - print(total_actionlines) + print("totoal action line:",total_actionlines) # for Speaker @@ -14508,6 +14764,7 @@ def print_audit_report_tabular_docx(audit_df,scriptname,author,pre_audit_pagenum print("collection_audited_line_no", collection_audited_line_no) data_string = ', '.join(collection_audited_line_no) print("data_string:", data_string) + print("58.2%") cells[1].width = Inches(0.1) cells[1].text = data_string @@ -14520,8 +14777,9 @@ def print_audit_report_tabular_docx(audit_df,scriptname,author,pre_audit_pagenum if not pd.isna(cur_data): # Check if the value is not NaN data = str(cur_data).strip() collection_data.append(data) + print("while loop run",data_index) data_index += 1 - + # print("while loop run",data_index) cells[2].width = Inches(3.5) data = str(data) cells[2].text = '\n '.join(collection_data) @@ -14544,26 +14802,28 @@ def print_audit_report_tabular_docx(audit_df,scriptname,author,pre_audit_pagenum collection_new_data = [] new_data_index = index - + print("%40") while new_data_index < len(audit_df) and str(audit_df["para_no"][new_data_index]) == str(para_value): if audit_df["line_removed"][new_data_index] == "No": new_data = audit_df['data_corrected'][new_data_index] if not pd.isna(new_data): # Check if the value is not NaN data = str(new_data).strip() collection_new_data.append(data) + print("new line 123") + print("new line",new_data_index) new_data_index += 1 data = str(new_data).strip() cells[4].width = Inches(3.5) data = str(data) cells[4].text = '\n '.join(collection_new_data) - + print("45%") sno = 1 changes_done = False # identification_status = audit_df['Identification_Status'][index] if pd.isnull(audit_df['Identification_Status'][index]) or audit_df['Identification_Status'][index] == "": continue - + print("50%") if audit_df['left_indent_corrected'][index] != 'No': change_comment = audit_df['left_indent_corrected'][index] @@ -14603,7 +14863,7 @@ def print_audit_report_tabular_docx(audit_df,scriptname,author,pre_audit_pagenum run.add_break() sno += 1 changes_done = True - + print("55%") if audit_df['right_indent_corrected'][index] != 'No': name = ps_to_script_element(audit_df['Identification_Status'][index]) change_comment = audit_df['right_indent_corrected'][index] @@ -14626,9 +14886,15 @@ def print_audit_report_tabular_docx(audit_df,scriptname,author,pre_audit_pagenum run.add_break() sno += 1 changes_done = True - + print("60%") if audit_df['case_corrected'][index] != 'No': - name = ps_to_script_element(audit_df['Identification_Status'][index]) + print("index 61%",index) + try: + name = ps_to_script_element(audit_df['Identification_Status'][index]) + except Exception as exp: + print("61 problrm is here") + print(exp) + pass string = str(audit_df['case_corrected'][index]) string = string.split() content = string[-1] @@ -14642,6 +14908,7 @@ def print_audit_report_tabular_docx(audit_df,scriptname,author,pre_audit_pagenum # dataa = data.split() # if dataa[-1] == "nan": # continue + print("index 63%") cells[5].width = Inches(2) para = cells[5].add_paragraph() run = para.add_run() @@ -14649,7 +14916,7 @@ def print_audit_report_tabular_docx(audit_df,scriptname,author,pre_audit_pagenum run.add_break() sno += 1 changes_done = True - + print("65%") if audit_df['line_wrapped_at_prescribed_right_indent'][index] != 'No': change_comment = 'Line Wrapped at Prescribed Right Indent 1 Inch' name = ps_to_script_element(audit_df['Identification_Status'][index]) @@ -14670,7 +14937,7 @@ def print_audit_report_tabular_docx(audit_df,scriptname,author,pre_audit_pagenum run.add_break() sno += 1 changes_done = True - + print("70%") if audit_df['line_broken_into_multiple_lines'][index] != 'No': name = ps_to_script_element(audit_df['Identification_Status'][index]) change_comment = f'{name} line Broken into Multiple Lines' @@ -14686,7 +14953,7 @@ def print_audit_report_tabular_docx(audit_df,scriptname,author,pre_audit_pagenum run.add_break() sno += 1 changes_done = True - + print("75%") if audit_df['line_merged_with_next_line'][index] != 'No': name = ps_to_script_element(audit_df['Identification_Status'][index]) change_comment = f'{name} line Merged with Next Line' @@ -14748,18 +15015,20 @@ def print_audit_report_tabular_docx(audit_df,scriptname,author,pre_audit_pagenum # run = para.add_run() # run.text = data # run.add_break() - + + print("last_line") + print(row_index) row_index += 1 buffer = io.BytesIO() output_doc.save(buffer) buffer.seek(0) - end_time = time.time() + end_time = time.time() print("print_audit_report_tabular_docx End time ", end_time) # print("conv_pdf_to_docx End time ", time.time()) time_taken = end_time - start_time - print("Time taken: ", time_taken) + print("Time taken: print_audit_report_tabular_docx", time_taken) #output_doc.save(audit_report_tabular_docx) return buffer diff --git a/kitchen_counter/scriptAudit/sa_functions_english.py b/kitchen_counter/scriptAudit/sa_functions_english.py index 0795171..0e9fe85 100755 --- a/kitchen_counter/scriptAudit/sa_functions_english.py +++ b/kitchen_counter/scriptAudit/sa_functions_english.py @@ -702,3 +702,53 @@ def run_audit_on_identified_english(df,audit_df): #df = df.sort_values(by=['line_no']).reset_index(drop =True) return df + +def ai_gen_script_to_audited_df(df): + + for index in df.index: + if df['isIdentified'][index] == 'Yes' or df['Identification_Status'][index] == 'blank' : + continue + if str(df['data'][index]).strip() == "" : + df['isIdentified'][index] = 'Yes' + df['Identification_Status'][index] = 'blank' + continue + if df['data'][index].startswith('INT.') or df['data'][index].startswith('EXT.') : + df['Identification_Status'][index] = 'ps1' + df['isIdentified'][index] = 'Yes' + #print(df['data'][index]) + continue + if df['nlb'][index] == 'Y' and df['plb'][index] == 'Y' and df['case'][index] == 'AllUpper': + df['Identification_Status'][index] = 'ps16' + df['isIdentified'][index] = 'Yes' + #print(df['data'][index]) + continue + if df['nlb'][index] == 'Y' and df['plb'][index] == 'Y' : + df['Identification_Status'][index] = 'ps6' + df['isIdentified'][index] = 'Yes' + #print(df['data'][index]) + continue + if df['nlb'][index] == 'Y' and df['plb'][index] == 'N' : + df['Identification_Status'][index] = 'ps15' + df['isIdentified'][index] = 'Yes' + #print(df['data'][index]) + continue + if df['nlb'][index] == 'N' and df['plb'][index] == 'Y' and df['parenthetical'][index] == 'PartMidEnd': + df['Identification_Status'][index] = 'ps8' + df['isIdentified'][index] = 'Yes' + #print(df['data'][index]) + continue + if df['nlb'][index] == 'N' and df['plb'][index] == 'Y' and df['parenthetical'][index] == 'Absent': + df['Identification_Status'][index] = 'ps7' + df['isIdentified'][index] = 'Yes' + #print(df['data'][index]) + continue + if df['nlb'][index] == 'N' and df['plb'][index] == 'N' and df['parenthetical'][index] == 'Complete': + df['Identification_Status'][index] = 'ps10' + df['isIdentified'][index] = 'Yes' + #print(df['data'][index]) + continue + ## identify unidentified as actions + df['Identification_Status'][index] = 'ps6' + df['isIdentified'][index] = 'Yes' + + return df diff --git a/kitchen_counter/scriptAudit/views.py b/kitchen_counter/scriptAudit/views.py index e2a65ba..c9c27a7 100755 --- a/kitchen_counter/scriptAudit/views.py +++ b/kitchen_counter/scriptAudit/views.py @@ -48,6 +48,7 @@ class Get_Counter(LoginRequiredMixin,APIView): print("CURREENT DOMAIN :-----") print(current_site) running_in_production = True + testing_on_dev = False if current_site in ["http://1.6.141.108", "http://1.6.141.104", "http://1.6.141.103", @@ -58,18 +59,31 @@ class Get_Counter(LoginRequiredMixin,APIView): "1.6.141.103", "1.6.141.106", "taj.mynextfilm.in", - "qa.mynextfilm.net", - "https://qa.mynextfilm.net", + # "qa.mynextfilm.net", + # "https://qa.mynextfilm.net", ]: running_in_production = False + + if current_site in ["qa.mynextfilm.net", + "https://qa.mynextfilm.net", + ]: + testing_on_dev = True + session = boto3.Session( aws_access_key_id='AKIAQVLBBGCB45RMLKVW', aws_secret_access_key='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', ) sqs = session.resource('sqs', region_name='ap-south-1') - queue = sqs.get_queue_by_name(QueueName="mnfqueue") - - + + if testing_on_dev == True: + print("#######\n\n\n") + print("Sending files to Development server\n\n\n\n") + queue = sqs.get_queue_by_name(QueueName="devqueue") + else: + queue = sqs.get_queue_by_name(QueueName="mnfqueue") + + + user = str(request.user) screenplay_name = request.data.get('screenplay_name') author = request.data.get('author_name') @@ -97,10 +111,12 @@ class Get_Counter(LoginRequiredMixin,APIView): print(response) except Exception as e: print("Error is", e) - - + print("#######\n\n\n") + print("Sending files to Production server\n\n\n\n") s3_url = f"https://{bucket}.s3.ap-south-1.amazonaws.com/{object_name}" else: + print("#######\n\n\n") + print("Sending files to Local server\n\n\n\n") s3_url = media_path audit_parameters = { "service_type" : "audit", @@ -531,27 +547,27 @@ def run_audit_in_counter(msg): # deleting the folder and files # Delete a file - media_path = os.path.join(settings.MEDIA_ROOT, "audit_counter_files", script_file_name) - if os.path.exists(media_path): - os.remove(media_path) - print(f"File '{media_path}' deleted successfully.") - else: - print(f"File '{media_path}' does not exist.") + # media_path = os.path.join(settings.MEDIA_ROOT, "audit_counter_files", script_file_name) + # if os.path.exists(media_path): + # os.remove(media_path) + # print(f"File '{media_path}' deleted successfully.") + # else: + # print(f"File '{media_path}' does not exist.") - # Delete a folder and its contents - folder1_path = os.path.join(settings.MEDIA_ROOT, "scripts_folder", script_id) - if os.path.exists(folder1_path): - shutil.rmtree(folder1_path) - print(f"Folder '{folder1_path}' and its contents deleted successfully.") - else: - print(f"Folder '{folder1_path}' does not exist.") + # # Delete a folder and its contents + # folder1_path = os.path.join(settings.MEDIA_ROOT, "scripts_folder", script_id) + # if os.path.exists(folder1_path): + # shutil.rmtree(folder1_path) + # print(f"Folder '{folder1_path}' and its contents deleted successfully.") + # else: + # print(f"Folder '{folder1_path}' does not exist.") - folder2_path = os.path.join(settings.MEDIA_ROOT, "audit_folder", script_id) - if os.path.exists(folder2_path): - shutil.rmtree(folder2_path) - print(f"Folder '{folder2_path}' and its contents deleted successfully.") - else: - print(f"Folder '{folder2_path}' does not exist.") + # folder2_path = os.path.join(settings.MEDIA_ROOT, "audit_folder", script_id) + # if os.path.exists(folder2_path): + # shutil.rmtree(folder2_path) + # print(f"Folder '{folder2_path}' and its contents deleted successfully.") + # else: + # print(f"Folder '{folder2_path}' does not exist.") return JsonResponse(data, status=200) # return Response("Success", status=200) @@ -836,7 +852,7 @@ class DownloadScriptFromBlockchain(APIView): else: return JsonResponse({"status":False, "error": "This Transcation Is Not Found On The Blockchain.",},status=500) else: - return JsonResponse({"status":False, "error": "Your Private Key Is Worng", "key": str(userkeys)},status=500) + return JsonResponse({"status":False, "error": "Your Private Key Is Wrong", "key": str(userkeys)},status=500) else: return JsonResponse({"status":False, "error": "Your Wallet is Not Created",},status=500) except Exception as e: