import numpy as np
import pandas as pd
import math
import os
import csv
import subprocess
import io
import shutil
from centralisedFileSystem.models import File, Script
from pathlib import Path
import re
import textwrap
import docx
from docx import Document
from docx.shared import Pt, RGBColor
from docx.shared import Mm,Inches
from docx.enum.text import WD_ALIGN_PARAGRAPH
from docx.enum.table import WD_TABLE_ALIGNMENT
from docx.enum.table import WD_CELL_VERTICAL_ALIGNMENT
from docx.enum.section import WD_ORIENT
from pdf2docx import parse
# import pdftotext
from scriptAudit.exceptions import ScriptAuditException
from utils import utilities
from datetime import date
from PyPDF2 import PdfFileReader, PdfFileWriter
from utils.scripts_functions import countPages
from conversion.translation.detection import script_det, language_detector
from conversion.translation.translation_variables import get_language_script_code, language_code

#mypath= str(Path(__file__).resolve().parent.parent) + "/neutralAudit/matrices/"
mypath= str(Path(__file__).resolve().parent) + "/matrices/"
# mypath = os.getcwd() +'\\'

def convert_to_pdf(input_docx, out_folder):
    subprocess.Popen(['libreoffice', '--headless', '--convert-to', 'pdf', '--outdir',out_folder, input_docx]).communicate()


def check_space_line(value):
    if value.isspace():
        return "Y"
    else:
        return "N"

def check_space(data):
    # counter
    space_count = 0
    for i in range(0, len(data)):

        # Check each char
        # is blank or not
        if data[i] == " ":
            space_count += 1
        else:
            break
    #print(space_count)
    return space_count

def get_last_char_pos(data):
    l = len(data)
    for pos in range(0,l):
        after_pos = data[pos+1:]
        #print(data[pos],after_pos)

        if after_pos.isspace() or not after_pos:
            return pos


def get_case(value):
    upperFound = False
    if check_space_line(value) == "Y":
        return "None"
    elif value.isupper():
        return "AllUpper"
    elif value.islower():
        return "AllLower"
    else :
        words = value.lstrip().split(" ")
        try:
            ch = words[0][0]
        except:
            return "None"
        if words[0][0]:

            if words[0][0].isupper() and not words[0].isupper():
                return "FirstCamel"

            elif words[0].isupper() and len(words[0]) > 1:
                return "FirstUpper"
            elif words[-1].isupper() and len(words[-1]) > 1:
                return "EndUpper"
            else:
                for word in words:
                    if word.isupper() and len(word) > 1:
                        upperFound = True
                if upperFound:
                    return "MidUpper"
                else:
                    return "Partial"

    return 'None'

def conv_pdf_to_docx(input_script,output_converted_docx):

    parse(input_script,output_converted_docx,start=0,end=None)
    

def conv_docx_to_txt(input_script,output_converted_txt):
#     import textwrap

#     from docx import Document
#     from docx.shared import Pt
#     from docx.shared import Mm

    read_doc = Document(input_script)
    # print(read_doc._body._body.xml)
    #section= read_doc.sections[-1]
    def recalculate_section_properties(n):


        try:
            section = read_doc.sections[n]
            section_width_inches = section.page_width.inches
            #section_width = int(section.page_width.inches * 10)
            # print("section width direct ",section.page_width.inches)
            # print(section.left_margin.inches,section.right_margin.inches)
            margins_inches = section.left_margin.inches + section.right_margin.inches
            #margins = int((section.left_margin.inches + section.right_margin.inches)*10)
            print(margins_inches)
            canvas_width_inches = section_width_inches - margins_inches
            canvas_width = int(canvas_width_inches *10)
            print("canvas width",canvas_width)
            left_margin = int(section.left_margin.inches * 10)
        except:
            section = None
            canvas_width = 65
            left_margin = 15

        return section,canvas_width,left_margin

    #for para in read_doc.paragraphs:
    #    n= 0
    #    p = para._p
    #    sectPrs = p.xpath("./w:pPr/w:sectPr")
    #    if sectPrs:
    #        n = n +1
    #        print("Section changed after para")
    #        print(para.text)
    #        section,canvas_width,left_margin = recalculate_section_properties(n)
    #        print(section.left_margin.inches)


    n = 0
    try:
        section,canvas_width,left_margin = recalculate_section_properties(n)
                                                           
                            
        print(section.left_margin.inches)
    except:
        n =-1
        section,canvas_width,left_margin = recalculate_section_properties(n)

    all_paras = read_doc.paragraphs
    first = all_paras[0].paragraph_format
    #print(first.left_indent)
    #count = 1
    print("number of paras",len(all_paras))
    #left_margin = 15


    previous_indent= 0
    with open(output_converted_txt, 'w', encoding='utf-8') as f:
        for para in all_paras:
            print('\n')
            paragraph_format = para.paragraph_format
    ## using the paragraph spacing add blank line if required

            try:
                space_before = paragraph_format.space_before.pt
            except:
                space_before = 0.0

            try:
                space_after = paragraph_format.space_after.pt
            except:
                space_after = 0.0

            print("space before")
            print(space_before)
            print("space after")
            print(space_after)
            try:
                print("line spacing ",paragraph_format.line_spacing.pt)
                print("line spacing rule ",paragraph_format.line_spacing_rule)
                if paragraph_format.line_spacing.pt < 5 and previous_indent > 20:
                    continue
                #print("space before",paragraph_format.space_before.pt)
            except:
                pass

            section_changed = False
            try:
                ####check section end and remove if CONTINUED
                p = para._p
                sectPrs = p.xpath("./w:pPr/w:sectPr")
                if sectPrs:
                    section_changed = True
                    print("checking for continued at section change")
                    text = para.text.split(' ')
                    print(text)
                    if len(text) == 1:
                        skip_words = ['CONT','CONTD','CONTINUED',"CONT'D"]
                        ## to be replaced by regex ,match
                        found_continue = False
                        for skip_word in skip_words:
                            if skip_word in text[0]:
                                #skip para
                                print("found continued")
                                found_continue = True
                                break
                        if found_continue:
                            print("skipping para but setting new section")
                            n= n+1
                            try:
                                section,canvas_width,left_margin = recalculate_section_properties(n)
                                print(section.left_margin.inches)
                            except Exception as e:
                                print(e)
                            continue
                    print("Continued not found at section change")


            except:
                pass


            if float(space_before) > 5.0 :
                print("adding blank line")
                f.write('\n')

            fli =0
            li =0
            ri =0
            try:
                if para.style.name == 'List Paragraph':
                    fli = 0
                else:
                    fli = paragraph_format.first_line_indent.inches

            except:
                pass
            try:

                li = paragraph_format.left_indent.inches
            except:
                pass

            try:

                ri = paragraph_format.right_indent.inches
            except:
                pass

            indent = int((fli + li ) * 10)
            print("calculated indent ",indent)

            data = para.text
            lines = data.split('\n')
            print("Examining para")
            try:
                print(para.text)
                print(para.style.name)
            except:
                pass

            print("lines in para",len(lines))
            ## remove starting number (before margin) and number after 65 characters
            if len(lines) == 1 :
                if indent < 0:
                    print(indent)
                    #start = -(indent )
                    #if re.search('\d',lines[0][0:start]):
                    #    lines[0] = " ".join(lines[0].split()[1:])
                    #else:
                    #    lines[0] = lines[0][start:]
                    indent = 0
                lines[0] = lines[0].rstrip()

                if len(lines[0]) > 40:
                    if lines[0][40:-2].strip() == '' and re.search('\d',lines[0][-2:]) :
                        lines[0] = lines[0][0:-2]

            print(indent)

            for line in lines:
                #line = line.rjust(len(line) + indent + left_margin)
                
                line = line.replace('\t','     ')
                if indent == 0:
                    indent = check_space(line)

                line = line.strip()
                if line:
                    #print(line)
                    print(fli,li,indent,ri)
                    print(para.alignment)
                    try:
                        width = int(canvas_width - (indent + ri*10))
                    except:
                        width = 58 - indent
    
                    #if fli == 0 and li == 0 and str(para.alignment) == 'CENTER (1)':
                    if str(para.alignment) == 'CENTER (1)' :
                        ch_count = len(line)
                        print("line is center aligned")
                        print(ch_count)
                        indent = indent + int((width-ch_count)/2)
                        print(indent)

                    #elif fli == 0 and li == 0 and str(para.alignment) == 'RIGHT (2)':
                    elif str(para.alignment) == 'RIGHT (2)':
                        ##removing fli li =
                        ch_count = len(line)
                        print("line is right aligned")
                        print(ch_count)
                        indent = indent + int(width-ch_count)

                        print(indent)

                    else:
                        if str(para.alignment) == 'JUSTIFY (3)':
                            line = ' '.join(line.split())
                        print("line is left aligned")
                        if indent+left_margin > 55:
                            indent = indent - 1

                        if width <= 0:
                            width = 1
                        print("Calculated Width:",width)
                        wrapped_lines = textwrap.wrap(line, width)
                        wrapped_data_lines_count = len(wrapped_lines)
                        if wrapped_data_lines_count > 1:
                            print("need to wrap line")

                            for wrapped_line in wrapped_lines:
                                #print(wrapped_line)
                                wrapped_line = wrapped_line.rjust(len(wrapped_line) + indent + left_margin)

                                print(indent+left_margin)
                                #print(wrapped_line)
                                f.write(wrapped_line)
                                f.write('\n')
                            continue

                    line = line.rjust(len(line) + indent + left_margin)
                    f.write(line)
                    f.write('\n')

                else:
                    print("line is blank")
                    f.write(line)
                    f.write('\n')

            ####check section end
            #p = para._p
            #sectPrs = p.xpath("./w:pPr/w:sectPr")
            if section_changed:
                n = n +1
                print("Section changed")
                section,canvas_width,left_margin = recalculate_section_properties(n)
                print(section.left_margin.inches)


            if space_after >5.0:
                print("adding blank line")
                f.write('\n')


            print("\n")
            previous_indent = indent + left_margin

    print("Converted to text")


def conv_pdf_to_txt(input_script,output_converted_txt):

    # Load your PDF
    with open(input_script, "rb") as f:
        # pdf = pdftotext.PDF(f) #06-2-24
        pass
    # # If it's password-protected
    # with open("secure.pdf", "rb") as f:
    #     pdf = pdftotext.PDF(f, "secret")

    # How many pages?
    print(len(pdf))

#     # Iterate over all the pages
#     for page in pdf:
#         print(page)

    # Read some individual pages
#     print(pdf[0])
#     print(pdf[1])

    # Read all the text into one string
    #print("\n\n".join(pdf))
    txt_data = "\n\n".join(pdf)

    with open(output_converted_txt, "w", encoding="utf8") as out_file:
        out_file.write(txt_data)


def conv_pdf_to_txt_java(input_script,output_converted_txt):

    from py4j.java_gateway import JavaGateway
    import sys
    #import global_file_db

    #pdf_file = global_file_db.input_script_pdf
    #converted_txt = global_file_db.input_text_file
    pdf_file = input_script


    gw = JavaGateway()
    result = gw.entry_point.strip(pdf_file)

    # result is a dict of {
    #   'success': 'true' or 'false',
    #   'payload': pdf file content if 'success' is 'true'
    #   'error': error message if 'success' is 'false'
    # }

    #print(result['error'])
    print(result['payload'])
    #print(result['success'])
    file = open(output_converted_txt, "w", encoding="utf8")
    file.write(str(result['payload']))


def conv_to_txt(input_script, output_converted_docx, output_converted_txt):

    extention = input_script.rsplit(".", 1)[-1]

    if extention == "txt":
        shutil.copyfile(input_script, output_converted_txt)

    elif extention == "pdf":
        # try:
        #     conv_pdf_to_txt(input_script, output_converted_txt)
        # except:
        #     conv_pdf_to_docx(input_script, output_converted_docx)
        #     conv_docx_to_txt(output_converted_docx, output_converted_txt)
        conv_pdf_to_docx(input_script, output_converted_docx)
        conv_docx_to_txt(output_converted_docx, output_converted_txt)

    elif extention == "docx":
        conv_docx_to_txt(input_script, output_converted_txt)

    elif extention == "fdx":
        fdx = open(input_script, 'r')
        plain_txt = utilities.fdx_to_txt(fdx)
        with open(output_converted_txt, 'w') as f:
            f.write(plain_txt)

    else:
        raise ScriptAuditException(f"{extention} file is not supported for Audit!")


def conv_to_df(txt_script)  :

    script_data = open(txt_script, 'r', encoding="utf-8").read()
    script_data = script_data.split("\n")

    paragphs = []
    line_no = 0.0
    data = ''
    fields = ['line_no','data','Identification_Status','isIdentified']
    df = pd.DataFrame([],columns= fields)

    for index_script in range(len(script_data)):
        # This replaces the new-line character with a space character within a paragraph.
        script_data[index_script] = script_data[index_script].replace("\n", " ")
        paragphs.append(script_data[index_script])
        #data = script_data[index_script]

    for index_para in range(len(paragphs)):
        data = paragphs[index_para]
        line_no +=1
        print("processing line",line_no)
        #print(data)
        df.loc[len(df.index)] = [str(line_no),data,'','No']

    return df


def conv_to_csv(txt_script,csv_for_processing)  :
    #print(csv_for_processing)
    import csv

    script_data = open(txt_script, 'r', encoding="utf-8").read()
    script_data = script_data.split("\n")

    paragphs = []
    line_no = 0.0
    data = ''

    for index_script in range(len(script_data)):
        # This replaces the new-line character with a space character within a paragraph.
        script_data[index_script] = script_data[index_script].replace("\n", " ")
        paragphs.append(script_data[index_script])

    fields = ['line_no','data','Identification_Status','isIdentified']

    with open(csv_for_processing, 'w',newline='') as csvfile:
        # creating a csv writer object
        csvwriter = csv.writer(csvfile)

        # writing the fields
        csvwriter.writerow(fields)


    for index_para in range(len(paragphs)):
        data = paragphs[index_para]
        line_no +=1
        print("processing line",line_no)
        #print(data)

        with open(csv_for_processing, 'a', encoding='utf-8',newline='') as csvfile:
            # creating a csv writer object
            csvwriter = csv.writer(csvfile)

            # writing the data rows
            csvwriter.writerow([str(line_no),data,'','No'])


def pre_assign_wts(df):
    
    skip_words = ['INT.','EXT.','I/E','E/I','CUT TO','CUT BACK TO','FLASHCUT TO','DISSOLVE TO', 'INTERCUT', 'INTER CUT','PBS', 'INTERVAL',
                  'FLASHBACK','FADE IN','FADE TO BLACK','ON THE SCREEN','ON THE TV','MORNING','AT HOTEL','TV','MONTAGES','MUSICAL MONTAGES','ESSENTIALS','LATER','ESSENTIAL']
    pos_sp_dial_line_nos = df.loc[(df['data'].str.strip().str.contains(r':-|:|-|".*"') == True) & (df['data'].str.strip().str.contains('|'.join(skip_words)) == False) ,'line_no'].to_list()
    print(pos_sp_dial_line_nos)
    new_pos_sp_dial_line_nos =pos_sp_dial_line_nos
    for index in df.loc[df['line_no'].isin(pos_sp_dial_line_nos),:].index:
        data = df['data'][index]
        line_no = 0.0
        new_line_no = 0.0
        pos_sp_par = ''
        line_no = df['line_no'][index]
        pos_sp =''
        pos_par = ''
        pos_dia = ''
        pos_sp_par = ''

        print(df.dtypes)
        try:
            print(data)
        except:
            pass
        
        if ":-" in data:
            pos_sp_par = data.split(":-")[0]
            pos_dia = data.split(":-")[-1].strip()
        elif ":" in data:
            pos_sp_par = data.split(":")[0]
            pos_dia = data.split(":")[-1].strip()
        elif "-" in data:
            pos_sp_par = data.split("-")[0]
            pos_dia = data.split("-")[-1].strip()
        elif "\"" in data:
            pos_sp_par = data.split("\"")[0]
            pos_dia = data.split("\"")[-2].strip()
        
        pos_sp_par = pos_sp_par.strip()
         
        if pos_sp_par:
            #print(pos_sp_par)
            if "(" in pos_sp_par and ")" in pos_sp_par:
                pos_sp = pos_sp_par.split("(")[0]
                pos_par = "(" + pos_sp_par.split("(")[-1]
            else:
                pos_sp = pos_sp_par
                pos_par = ''
                
        print(pos_sp) 
        print(pos_par)
        print(pos_dia)
        
        if pos_sp:
            has_digit = any(chr.isdigit() for chr in pos_sp)
            if not has_digit and pos_sp.isupper() and pos_dia.strip():
            #if pos_dia.strip():    
                df['data'][index] = pos_sp
                df['preassigned_weights'][index] ='ps7-20'
                if pos_par:
                    df.loc[index + 0.3] = np.nan
                    df.loc[index + 0.3,'data'] = pos_par
                    new_line_no = line_no + 0.3
                    df.loc[index + 0.3,'line_no'] = new_line_no
                    df.loc[index + 0.3,'isIdentified'] = 'No'
                    df.loc[index + 0.3,'preassigned_weights'] = 'ps10-20' 
                
                    new_pos_sp_dial_line_nos.append(new_line_no)
                    print("split pos_par",df.loc[index + 0.3,'line_no'])
                if pos_dia:
                    print("1",df.dtypes)
                    df.loc[index + 0.6] = np.nan
                    print("1.5",df.dtypes)
                    df.loc[index + 0.6,'data'] = pos_dia
                    new_line_no = line_no + 0.6
                    print(type(line_no),type(new_line_no))
                    df.loc[index + 0.6,'line_no'] = new_line_no
                    print("2",df.dtypes)
                    df.loc[index + 0.6,'isIdentified'] = 'No'
                    df.loc[index + 0.6,'preassigned_weights'] = 'ps13-20;ps14-20;ps15-20'
                
                    new_pos_sp_dial_line_nos.append(new_line_no)
                    print("split pos_dia",df.loc[index + 0.6,'line_no'],type(df.loc[index + 0.6,'line_no']))
                    print("3",df.dtypes)
    df = df.sort_index().reset_index(drop=True)
    for index in df.index:
        df['line_no'][index] = float(index + 1)

    return df


def create_audit_df(df):
    audit_df = df[['line_no','data']]
    audit_df['Identification_Status'] = ''
    audit_df['data_corrected'] = ''
    audit_df['audited_line_no'] = ''
    audit_df['scene_number'] = ''
    audit_df['line_removed'] = 'No'
    audit_df['introduction'] = 'No'
    audit_df['appendix'] = 'No'
    audit_df['page_no'] = 'No'
    audit_df['left_indent_corrected'] = 'No'
    audit_df['right_indent_corrected'] = 'No'
    audit_df['line_wrapped_at_prescribed_right_indent'] = 'No'
    audit_df['case_corrected'] = 'No'
    audit_df['blank_inserted_before'] = 'No'
    audit_df['blank_inserted_after'] = 'No'
    audit_df['blank_deleted_before'] = 'No'
    audit_df['blank_deleted_after'] = 'No'
    audit_df['space_removed_between_characters'] = 'No'
    audit_df['space_added_between_characters'] = 'No'
    audit_df['line_merged_with_next_line'] = 'No'
    audit_df['line_broken_into_multiple_lines'] = 'No'
    audit_df['punctuation_mark_added'] = 'No'
    audit_df['punctuation_mark_removed'] = 'No'
    audit_df['language_specific_audit_comments'] = 'No'

    audit_df.set_index('line_no',inplace=True)

    return audit_df

def trim_intro(df,audit_df):


    stopwords = ['FADE IN' ]
    remove_upto = -1
    intro_removed = False

    for index in df.index:
        data = df['data'][index]
        data = ' '.join(data.split())
        for sw in stopwords:
            if re.search(sw,data,re.IGNORECASE):
                print("Found Fade In",index)
                remove_upto = index
                if remove_upto <= 100 :

                    print("removing lines till ", remove_upto)
                    while remove_upto != -1:
                        line_no = df['line_no'][remove_upto]
                        audit_df['line_removed'][line_no] = 'Yes'
                        audit_df['introduction'][line_no] = 'Yes'

                        df.drop(remove_upto,inplace= True)
                        remove_upto -= 1

                        intro_removed = True
                    print("title and introduction removed")
                break
        if intro_removed:
            break


def remove_page_numbers(df,audit_df):

    page_no_found = False
    for index in df.index:
        data = df['data'][index]
        if check_space(data) > 54:
            pos_page_no = data.strip()
            if pos_page_no:
                for ch in pos_page_no:
                    if not re.match('[\d\.]',ch):
                        page_no_found = False
                        break
                    else:
                        page_no_found = True
        else:
            continue
        if page_no_found:
            line_no = df['line_no'][index]

            audit_df['line_removed'][line_no] = 'Yes'
            audit_df['page_no'][line_no] = 'Yes'


def get_per_uppercase(text):
    count_upper = 0
    for ch in text.strip():
        if ch.isupper():
            count_upper += 1
    try:
        return (int(count_upper/(len(text.strip()))*100))
    except:
        return 0
def prep_for_audit(df):
    df.reset_index(inplace=True, drop=True)
    import re
    print("Entering prep_for_audit")
    
    df['data'].fillna('',inplace =True)
    
    if 'scene_number' not in df.columns:
        df['scene_number'] = ''
    if 'Identification_Status' not in df.columns:
        df['Identification_Status'] = ''
    if 'plb' not in df.columns:
        df['plb'] = ''
    if 'nlb' not in df.columns:
        df['nlb'] = ''
    if 'ssc' not in df.columns:
        df['ssc'] = ''
    if 'lcp' not in df.columns:
        df['lcp'] = 0
    if 'case' not in df.columns:
        df['case'] = ''
    if 'per_uppercase' not in df.columns:
        df['per_uppercase'] = ''
    if 'parenthetical' not in df.columns:
        df['parenthetical'] = ''
    if 'pnbl_line_no' not in df.columns:
        df['pnbl_line_no'] = ''
    if 'nnbl_line_no' not in df.columns:
        df['nnbl_line_no'] = ''
    if 'ppnbl_line_no' not in df.columns:
        df['ppnbl_line_no'] = ''
    if 'nnnbl_line_no' not in df.columns:
        df['nnnbl_line_no'] = ''
    if 'pdil_line_no' not in df.columns:
        df['pdil_line_no'] = ''
    if 'ndil_line_no' not in df.columns:
        df['ndil_line_no'] = ''
    print("prep_for_audit- after if")
    #print(str(df['line_no']))
    print("593")
    print(df)
    for index in df.index:
        #print(index)
        data=df['data'][index]
        #print(data)
        if check_space(data) >= 140 or data.isspace() or (not data ):
            df['Identification_Status'][index] = 'blank'


        first_line = False
        last_line = False

        if index == 0 :
            first_line = True
            plb = "N"
        else:
            pvs_data = df['data'][index-1]

        if index == df.index[-1]:
            last_line = True
            nlb = "N"
        else:
            next_data = df['data'][index+1]

        print("616")
        if (not first_line):
            if check_space(pvs_data) >= 140 or pvs_data.isspace() or (not pvs_data ):
                plb = "Y"
            else:
                plb = "N"

        #print(plb)

        if (not last_line):
            if check_space(next_data) >= 140 or next_data.isspace() or (not next_data ):
                nlb = "Y"
            else:
                nlb = "N"

        #print(nlb)

        print("633")
        cur_indent = check_space(data)
        lcp = get_last_char_pos(data)
        case = get_case(data)
        per_uppercase = get_per_uppercase(data)

        par = ''
        if re.match('\(',data.strip()[:1]):
            if re.match('\)',data.strip()[-1:]) :
                par = 'Complete'
            elif re.search('\)',data.strip()) :
                par = 'PartStartMid'
            else:
                par = 'StartingLeft'

        elif re.match('\)',data.strip()[-1:]):
            if re.search('\(',data.strip()):
                par = 'PartMidEnd'
            else:
                par = 'EndingRight'
        # beginning end already checked so now if paren present it is mixed
        elif re.search('\(',data.strip()) and re.search('\)',data.strip()):
            par = 'PartMidMid'
        elif re.search('\(',data.strip()):
            par = 'MixedLeft'
        elif re.search('\)',data.strip()):
            par = 'MixedRight'
        else:
            par = 'Absent'
        print("660")

        df['plb'][index] = plb
        df['nlb'][index] = nlb
        df['ssc'][index] = cur_indent
        df['lcp'][index] = lcp
        df['case'][index] = case
        df['parenthetical'][index] = par
        df['per_uppercase'][index] = per_uppercase
        ## pnlb ?
        if first_line:
            pnbl_line_no = 0
        elif plb == 'N':
            pnbl_line_no = df['line_no'][index -1]
        elif index - 1 == 0:
            pnbl_line_no = 0
        else:
            pnbl_line_no = df['line_no'][index -2]
        print("678")
        ## nnlb ?
        if last_line:
            nnbl_line_no = 100000
        elif nlb == 'N':
            nnbl_line_no = df['line_no'][index +1]
        elif index + 1 == df.index[-1]:
            nnbl_line_no = 100000
        else:
            try:
                nnbl_line_no = df['line_no'][index +2]
            except Exception as e:
                print("Exception--",e)
                i = float(index) + 2
                print("691",i,index)
                print(str(df['line_no']))
                print("692",df['line_no'][i])
                 
                print(nnbl_line_no)
        print("694")
        df['pnbl_line_no'][index] = pnbl_line_no
        df['nnbl_line_no'][index] = nnbl_line_no

    print("prep_for_audit- after 1st for loop")
    for index in df.index:
        line_no =  df['line_no'][index]
        pnbl_line_no = df['pnbl_line_no'][index]
        if pnbl_line_no == 0:
            ppnbl_line_no = 0
        else:
            ppnbl_line_no = df.loc[df['line_no'] == pnbl_line_no, 'pnbl_line_no'].values[0]

        nnbl_line_no = df['nnbl_line_no'][index]
        print(index,line_no,pnbl_line_no,nnbl_line_no)
        if nnbl_line_no == 100000:
            nnnbl_line_no = 100000
        else:
            nnnbl_line_no = df.loc[df['line_no'] == nnbl_line_no, 'nnbl_line_no'].values[0]

        df['ppnbl_line_no'][index] = ppnbl_line_no
        df['nnnbl_line_no'][index] = nnnbl_line_no
    print("prep_for_audit- after 2nd for loop")
    for index in df.index:

        data=df['data'][index]
        pdil_line_no = 0
        cur_indent = df['ssc'][index]

        ##pdil
        ## lets find previous different indent line
        print(index,"looking for previous different indent line")
        if index == 0:
            df['pdil_line_no'][index] = pdil_line_no
            continue

        pdil_index = index - 1
        while pdil_index >= 0 :
            pdil_indent = df['ssc'][pdil_index]
            print(cur_indent,pdil_indent)
            if df['Identification_Status'][pdil_index] != 'blank' and pdil_indent != cur_indent:
                pdil_line_no = df['line_no'][pdil_index]
                break
            else:
                pdil_index -= 1

        df['pdil_line_no'][index] = pdil_line_no
    print("prep_for_audit- after 3rd for loop")
    for index in df.index:

        data=df['data'][index]
        ndil_line_no = 100000
        cur_indent = df['ssc'][index]

        print("looking for next different indent line")
        if index == df.index[-1]:
            df['ndil_line_no'][index] = ndil_line_no
            continue

        ndil_index = index + 1
        # ndil
        while ndil_index <= df.index[-1]:
            ndil_indent = df['ssc'][ndil_index]
            print(cur_indent,ndil_indent)
            if df['Identification_Status'][ndil_index] != 'blank' and ndil_indent != cur_indent:
                ndil_line_no = df['line_no'][ndil_index]
                break
            else:
                ndil_index += 1

        df['ndil_line_no'][index] = ndil_line_no

    return df

def remove_extra_blank_lines(df,audit_df):
    # remove two or more consequtive blank lines.. keep one
    for index in range(0,df.index[-1]):
        data = df['data'][index]
        line_no = df['line_no'][index]

        nl_data = df['data'][index+1]
        try:
            print(data)
        except:
            pass
        if not data.strip() and not nl_data.strip():
            audit_df['line_removed'][line_no] = 'Yes'
            audit_df['Identification_Status'][line_no] = 'blank'
        elif not data.strip() and nl_data.strip():
            df['plb'][index] = 'N'

def remove_blank_line_after_parenthetical(df,audit_df):
    # remove two or more consequtive blank lines.. keep one
    for index in range(0,df.index[-1]):
        data = df['data'][index]
        line_no = df['line_no'][index]

        nl_data = df['data'][index+1]
        nl_line_no = df['line_no'][index+1]
        try:
            print(data)
        except:
            pass
        if df['parenthetical'][index] in ('Complete','EndingRight') and not nl_data.strip():
            audit_df['line_removed'][nl_line_no] = 'Yes'
            df['nlb'][index] = 'N'


def merge_broken_lines(df,audit_df):

    index_iter = iter(range(0,df.index[-1]))
    for index in index_iter:

        cur_line_data = df['data'][index]
        cur_line_indent = df['ssc'][index]
        cur_case = 'AllUpper'
        cur_lcp = df['lcp'][index]
        nnbl_line_no = df['nnbl_line_no'][index]
        nlb = df['nlb'][index]
#         if nlb == 'Y':
#             next_nbl_index = index +2
#             if next_nbl_index > df.index[-1]:
#                 continue
#         else:
#             next_nbl_index = index +1


        try:
            next_nbl_data = df.loc[df['line_no'] == nnbl_line_no , 'data'].values[0]
            next_nbl_indent = df.loc[df['line_no'] == nnbl_line_no , 'ssc'].values[0]
            next_nbl_case = df.loc[df['line_no'] == nnbl_line_no , 'case'].values[0]
        except:
            next_nbl_data = ''
            next_nbl_indent = 0

        line_no = df['line_no'][index]
        #next_nbl_line_no = df['line_no'][next_nbl_index]

        two_line_data = ''
        indent_dif  = next_nbl_indent - cur_lcp
        print(line_no,indent_dif)
        if  indent_dif > 0 and indent_dif <= 3 and next_nbl_case != 'AllUpper' and cur_case != 'AllUpper':

            if indent_dif == 1:
                two_line_data = cur_line_data.rstrip() + next_nbl_data.lstrip()
            else:
                two_line_data = cur_line_data.rstrip() + ' ' + next_nbl_data.lstrip()
            two_line_len = len(two_line_data.strip())


            print(index,line_no,cur_line_indent,next_nbl_indent,two_line_len)
            print(cur_line_data)
            print(next_nbl_data)


            if two_line_len < 150:

                print("merging lines")
                df['data'][index] = two_line_data
                case = get_case(two_line_data)
                df['case'][index] = case
    #             lcp = get_last_char_pos(two_line_data)
    #             df['last_character_placement'][index] = lcp
                print(line_no)
                audit_df['line_merged_with_next_line'][line_no] = 'Yes'
                print(two_line_data)
                audit_df['line_removed'][nnbl_line_no] = 'Yes'

#                 try:
#                     df['nlb'][next_nbl_index-1] = df['nlb'][next_nbl_index]
#                 except:
#                     pass

#                 try:
#                     df['plb'][next_nbl_index+1] = df['plb'][next_nbl_index]
#                 except:
#                     pass

                if nlb == 'N':
                    next(index_iter)

                else:
                    next(index_iter)
                    next(index_iter)


            else:
                print(cur_line_data)


        else:
            print(index,cur_line_indent,next_nbl_indent)
            try:
                print(cur_line_data)
            except:
                pass

            #newfile.write(cur_line_data)

def remove_space_between_words(df,audit_df):

    lines_removed = audit_df.loc[audit_df['line_removed'] == 'Yes'].index.to_list()
    # remove extra spaces between the words
    for index in df.index:
        cur_indent = df['ssc'][index]
        line_no = df['line_no'][index]

        if (line_no in lines_removed) or cur_indent > 140:
            continue
        data = df['data'][index]
        new_data = ''
        words = data.lstrip().split()
        for word in words:
            #print(word)
            new_data += word + " "
        new_data = new_data.rjust(len(new_data)+cur_indent)
        df['data'][index] = new_data
        df['lcp'][index] = get_last_char_pos(df['data'][index])
        if new_data.strip() != data.strip():
            audit_df['space_removed_between_characters'][line_no] = 'Yes'

        print(index)
        try:
            print(data)
            print(new_data)
        except:
            pass


    #df = df.loc[df['line_removed'] != 'Yes',:]


def get_strict_conditions(csv_strict_conditions):
    import pandas as pd

    conditions_df = pd.read_csv(csv_strict_conditions, index_col = [0], skiprows = [0])
    conditions_df = conditions_df.head(30)
    cols = conditions_df.columns
    conditions_df.rename(columns= { cols[3]:'cl_plb',
                                  cols[4]:'cl_nlb',
                                  cols[5]:'cl_ssc',
                                  cols[6]:'cl_lcp',
                                  cols[7]:'cl_par',
                                  cols[8]:'cl_case',
                                  cols[9]:'cl_per_uppercase',
                                  cols[10]:'pnbl_plb',
                                  cols[11]:'pnbl_par',
                                  cols[12]:'pnbl_vs_cur_indent',
                                  cols[15]:'pnbl_case',
                                  cols[16]:'nnbl_nlb',
                                  cols[17]:'nnbl_par',
                                  cols[18]:'nnbl_vs_cur_indent',
                                  cols[21]:'nnbl_case',
                                  cols[22]:'pdil_plb',
                                  cols[23]:'pdil_nlb',
                                  cols[24]:'pdil_vs_cur_indent',
                                  cols[27]:'pdil_par',
                                  cols[29]:'ndil_plb',
                                  cols[30]:'ndil_nlb',
                                  cols[31]:'ndil_vs_cur_indent',
                                  cols[34]:'ndil_par',
                                  }, inplace = True)
    conditions_df = conditions_df[['cl_plb','cl_nlb','cl_ssc','cl_lcp','cl_par','cl_case','cl_per_uppercase',
    'pnbl_plb','pnbl_par','pnbl_vs_cur_indent','pnbl_case',
    'nnbl_nlb','nnbl_par','nnbl_vs_cur_indent','nnbl_case',
    'pdil_plb','pdil_nlb','pdil_par','pdil_vs_cur_indent',
    'ndil_plb','ndil_nlb','ndil_par','ndil_vs_cur_indent']]


    return conditions_df

def test_strict_conditions(df,csv_strict_conditions):
    import pandas as pd

    left_aligned = True

    for index in df.index:
        if df['ssc'][index] > 15:
            left_aligned = False
            break
    ## if conversion to text is left aligned then dialogue middle wrongly getting identified as action middle so skipping strict contions
    if left_aligned:
        df.loc[df['Identification_Status'] == 'blank','isIdentified'] = 'Yes'
        return

    conditions_df = get_strict_conditions(csv_strict_conditions)
    #df['isIdentified'] = 'No'
    df['When_Identified'] = ''
    bb = False
    for index in df.index:

        if df['isIdentified'][index] == 'Yes':
            continue

        cl_plb = df['plb'][index]
        cl_nlb = df['nlb'][index]
        cl_indent = pd.to_numeric(df['ssc'][index])
        cl_lcp = df['lcp'][index]
        cl_par = df['parenthetical'][index]
        cl_case = str(df['case'][index])
        cl_per_uppercase = df['per_uppercase'][index]

        pnbl_plb = None
        pnbl_indent = None
        pnbl_par = None
        pnbl_case = None

        nnbl_nlb = None
        nnbl_indent = None
        nnbl_par = None
        nnbl_case = None

        pnbl = True
        nnbl = True
        pdil = True
        ndil = True

        pdil_plb = None
        pdil_nlb = None
        pdil_par = None
        pdil_indent = None

        ndil_plb = None
        ndil_nlb = None
        ndil_par = None
        ndil_indent = None

        pnbl_vs_cur_indent = "NA"
        nnbl_vs_cur_indent = "NA"
        pdil_vs_cur_indent = "NA"
        ndil_vs_cur_indent = "NA"

        try:
            pnbl_line_no = df['pnbl_line_no'][index]
            pnbl_index = df.loc[df['line_no'] == pnbl_line_no, :].index.values[0]
        except:
            pnbl = False

        if pnbl:
            pnbl_plb = df['plb'][pnbl_index]
            pnbl_indent = df['ssc'][pnbl_index]
            pnbl_par = df['parenthetical'][pnbl_index]
            pnbl_case = str(df['case'][pnbl_index])

            if pnbl_indent > cl_indent:
                pnbl_vs_cur_indent = "More"
            elif pnbl_indent == cl_indent:
                pnbl_vs_cur_indent = "Same"
            else:
                pnbl_vs_cur_indent = "Less"

            #print(pnbl_index)


        try:
            nnbl_line_no = df['nnbl_line_no'][index]
            nnbl_index = df.loc[df['line_no'] == nnbl_line_no, :].index.values[0]

            nnbl_nlb = df['nlb'][nnbl_index]
            nnbl_indent = df['ssc'][nnbl_index]
            nnbl_par = df['parenthetical'][nnbl_index]
            nnbl_case = str(df['case'][nnbl_index])

            if nnbl_indent > cl_indent:
                nnbl_vs_cur_indent = "More"
            elif nnbl_indent == cl_indent:
                nnbl_vs_cur_indent = "Same"
            else:
                nnbl_vs_cur_indent = "Less"
        except:
            nnbl = 'afterlast'


        try:
            pdil_line_no = df['pdil_line_no'][index]
            pdil_index = df.loc[df['line_no'] == pdil_line_no, :].index.values[0]
        except:
            pdil = False

        if pdil:
            pdil_plb = df['plb'][pdil_index]
            pdil_nlb = df['nlb'][pdil_index]
            pdil_par = df['parenthetical'][pdil_index]
            pdil_indent = df['ssc'][pdil_index]


            if pdil_indent > cl_indent:
                pdil_vs_cur_indent = "More"
            elif pdil_indent == cl_indent:
                pdil_vs_cur_indent = "Same"
            else:
                pdil_vs_cur_indent = "Less"


        try:
            ndil_line_no = df['ndil_line_no'][index]
            ndil_index = df.loc[df['line_no'] == ndil_line_no, :].index.values[0]
        except:
            ndil = False

        if ndil:
            ndil_plb = df['plb'][ndil_index]
            ndil_nlb = df['nlb'][ndil_index]
            ndil_par = df['parenthetical'][ndil_index]
            ndil_indent = df['ssc'][ndil_index]


            if ndil_indent > cl_indent:
                ndil_vs_cur_indent = "More"
            elif ndil_indent == cl_indent:
                ndil_vs_cur_indent = "Same"
            else:
                ndil_vs_cur_indent = "Less"


        cl_pos = ''

        ## get the conditions

        #for j in range(1,32):
        for j in range(1,18):

            if j in [23,24,32,33]:
                continue

            ev_cl_plb = conditions_df['cl_plb' ]["ps{0}".format(j)]
            ev_cl_nlb = conditions_df['cl_nlb' ]["ps{0}".format(j)]
            ev_cl_indent_range = conditions_df['cl_ssc' ]["ps{0}".format(j)].split('-')
            try:
                ev_cl_indent_from = pd.to_numeric(ev_cl_indent_range[0])
            except:
                ev_cl_indent_from = 200
            try:
                ev_cl_indent_to = pd.to_numeric(ev_cl_indent_range[1])
            except:
                ev_cl_indent_to = ev_cl_indent_from

            ev_cl_lcp_range = conditions_df['cl_lcp' ]["ps{0}".format(j)].split('-')
            try:
                ev_cl_lcp_from = pd.to_numeric(ev_cl_lcp_range[0])
            except:
                ev_cl_lcp_from = 200
            try:
                ev_cl_lcp_to = pd.to_numeric(ev_cl_lcp_range[1])
            except:
                ev_cl_lcp_to = ev_cl_lcp_from


            ev_cl_par = conditions_df['cl_par' ]["ps{0}".format(j)].split(";")
            ev_cl_case = []
            ev_cl_case = conditions_df['cl_case' ]["ps{0}".format(j)].split(";")

            ev_cl_per_uppercase = conditions_df['cl_per_uppercase']["ps{0}".format(j)].split(";")
            try:
                operator = ev_cl_per_uppercase[0]
                value = int(ev_cl_per_uppercase[1])
            except:
                operator = ""
                value = ""

            
            ev_pnbl_plb = conditions_df['pnbl_plb' ]["ps{0}".format(j)]
            ev_pnbl_vs_cur_indent = conditions_df['pnbl_vs_cur_indent' ]["ps{0}".format(j)].split(";")
            ev_pnbl_par = conditions_df['pnbl_par' ]["ps{0}".format(j)].split(";")
            ev_pnbl_case = conditions_df['pnbl_case' ]["ps{0}".format(j)].split(";")

            ev_nnbl_nlb = conditions_df['nnbl_nlb' ]["ps{0}".format(j)]
            ev_nnbl_vs_cur_indent = conditions_df['nnbl_vs_cur_indent' ]["ps{0}".format(j)].split(";")
            ev_nnbl_par = conditions_df['nnbl_par' ]["ps{0}".format(j)].split(";")
            ev_nnbl_case = conditions_df['nnbl_case' ]["ps{0}".format(j)].split(";")

            ev_pdil_plb = conditions_df['pdil_plb' ]["ps{0}".format(j)]
            ev_pdil_nlb = conditions_df['pdil_nlb' ]["ps{0}".format(j)]
            ev_pdil_vs_cur_indent = conditions_df['pdil_vs_cur_indent' ]["ps{0}".format(j)].split(";")
            ev_pdil_par = conditions_df['pdil_par' ]["ps{0}".format(j)].split(";")

            ev_ndil_plb = conditions_df['ndil_plb' ]["ps{0}".format(j)]
            ev_ndil_nlb = conditions_df['ndil_nlb' ]["ps{0}".format(j)]
            ev_ndil_vs_cur_indent = conditions_df['ndil_vs_cur_indent' ]["ps{0}".format(j)].split(";")
            ev_ndil_par = conditions_df['ndil_par' ]["ps{0}".format(j)].split(";")

            ## checks

            if not pnbl:
                pnbl_plb_check = True
                pnbl_indent_check = True
                pnbl_par_check = True
                pnbl_case_check = True
            else:

                pnbl_plb_check = True if (pnbl_plb == ev_pnbl_plb) or (ev_pnbl_plb == 'Maybe') else False
                pnbl_indent_check = True if (pnbl_vs_cur_indent in ev_pnbl_vs_cur_indent ) else False
                pnbl_par_check = True if pnbl_par in ev_pnbl_par else False
                if pnbl_case in ev_pnbl_case or ev_pnbl_case == '' :
                    pnbl_case_check = True
                else:
                    pnbl_case_check = False


            if not nnbl :
                nnbl_nlb_check = True
                nnbl_indent_check = True
                nnbl_par_check = True
                nnbl_case_check = True
            else:

                nnbl_nlb_check = True if (nnbl_nlb == ev_nnbl_nlb) or (ev_nnbl_nlb == 'Maybe') else False
                nnbl_indent_check = True if (nnbl_vs_cur_indent in ev_nnbl_vs_cur_indent ) else False
                nnbl_par_check = True if nnbl_par in ev_nnbl_par else False
                if nnbl_case in ev_nnbl_case or ev_nnbl_case == '' :
                    nnbl_case_check = True
                else:
                    nnbl_case_check = False

            if not pdil:
                pdil_plb_check = True
                pdil_nlb_check = True
                pdil_indent_check = True
                pdil_par_check = True
            else:

                pdil_plb_check = True if (pdil_plb == ev_pdil_plb) or (ev_pdil_plb == 'Maybe') else False
                pdil_nlb_check = True if (pdil_nlb == ev_pdil_nlb) or (ev_pdil_nlb == 'Maybe') else False
                pdil_indent_check = True if (pdil_vs_cur_indent in ev_pdil_vs_cur_indent ) else False
                pdil_par_check = True if pdil_par in ev_pdil_par else False

            if not ndil:
                ndil_plb_check = True
                ndil_nlb_check = True
                ndil_indent_check = True
                ndil_par_check = True
            else:

                ndil_plb_check = True if (ndil_plb == ev_ndil_plb) or (ev_ndil_plb == 'Maybe') else False
                ndil_nlb_check = True if (ndil_nlb == ev_ndil_nlb) or (ev_ndil_nlb == 'Maybe') else False
                ndil_indent_check = True if (ndil_vs_cur_indent in ev_ndil_vs_cur_indent ) else False
                ndil_par_check = True if ndil_par in ev_ndil_par else False


            cl_indent_check = False
            cl_lcp_check = False

            ## check conditions
            cl_plb_check = True if (cl_plb == ev_cl_plb) or (ev_cl_plb == 'Maybe') else False
            cl_nlb_check = True if (cl_nlb == ev_cl_nlb) or (ev_cl_nlb == 'Maybe') else False

            cl_indent_check = True if (cl_indent >= ev_cl_indent_from) and (cl_indent <= ev_cl_indent_to) else False

            cl_lcp_check = True if (cl_lcp >= ev_cl_lcp_from) and (cl_lcp <= ev_cl_lcp_to) else False

            cl_par_check = True if cl_par in ev_cl_par else False

            if j == 21 and cl_case in ev_cl_case:
                data = df['data'][index]
                if data.split()[-1] == data.split()[-1].upper():
                    cl_case_check = True
                else:
                    cl_case_check = False

            else:
                cl_case_check = True if cl_case in ev_cl_case else False

            cl_per_uppercase_check  = True
            ## cl percentage upper checks
            if operator == "lessthan":
                cl_per_uppercase_check = True if cl_per_uppercase <= value else False
            elif operator == "morethan":
                cl_per_uppercase_check = True if cl_per_uppercase >= value else False
            elif operator == "equal":
                cl_per_upperacse_check = True if cl_per_uppercase == value else False

            #cl_par_check = True
            checklist = [cl_plb_check,cl_nlb_check,cl_indent_check,cl_lcp_check,cl_par_check,cl_case_check,cl_per_uppercase_check,
            pnbl_plb_check,pnbl_indent_check,pnbl_par_check,pnbl_case_check,
            nnbl_nlb_check,nnbl_indent_check,nnbl_par_check,nnbl_case_check,
            pdil_plb_check,pdil_nlb_check,pdil_indent_check,pdil_par_check,
            ndil_plb_check,ndil_nlb_check,ndil_indent_check,ndil_par_check]


            if all(checklist):
                cl_pos = "ps{0}".format(j)
                df['Identification_Status'][index] = cl_pos
                df['isIdentified'][index] = 'Yes'
                df['When_Identified'][index] = 'FirstStrictConditions'
                break


    df.loc[df['Identification_Status'] == 'blank','isIdentified'] = 'Yes'


def prep_weights_csv (weights_csv) :

    wts_df = pd.read_csv(weights_csv,skiprows=[0])
    wts_df = wts_df.head(50)
    wts_df.rename(columns={wts_df.columns[1]:'Possibilities',wts_df.columns[2]:'Description',
                      wts_df.columns[3]:'PureImpure',wts_df.columns[7]:'AllUpper',
                       wts_df.columns[8]:'AllLower',
                       wts_df.columns[9]:'FirstCamel',
                       wts_df.columns[10]:'FirstUpper',wts_df.columns[11]:'FirstLowerSomeUpper',
                      wts_df.columns[12]:'Partial',wts_df.columns[13]:'EntireLine',
                       wts_df.columns[14]:'PartofLine',wts_df.columns[15]:'only left parenthetical present',
                       wts_df.columns[16]:'only right parenthetical present',
                       wts_df.columns[17]:'PLB_Yes',wts_df.columns[18]:'PLB_No',
                       wts_df.columns[19]:'NLB_Yes',wts_df.columns[20]:'NLB_No',
                       wts_df.columns[21]:'<15withNumeric',
                    #    wts_df.columns[28]:'<15withoutNumeric',
                    #    wts_df.columns[33]:'cur_indent_equals_pnbl',
                    #    wts_df.columns[34]:'cur_indent_equals_nnbl',
                    #    wts_df.columns[35]:'containsSpecialWords1',
                    #    wts_df.columns[36]:'containsSpecialWords2',
                    #    wts_df.columns[37]:'containsSpecialWords3',
                    #    wts_df.columns[38]:'containsSpecialWords4'
                       wts_df.columns[29]:'<15withoutNumeric',
                       wts_df.columns[34]:'cur_indent_equals_pnbl',
                       wts_df.columns[35]:'cur_indent_equals_nnbl',
                       wts_df.columns[36]:'containsSpecialWords1',
                       wts_df.columns[37]:'containsSpecialWords2',
                       wts_df.columns[38]:'containsSpecialWords3',
                       wts_df.columns[39]:'containsSpecialWords4'

                      },inplace = True)


    wts_df = wts_df.loc[:,['Possibilities', 'Description', 'PureImpure','AllUpper','AllLower','FirstCamel','FirstUpper', 'FirstLowerSomeUpper',
            'Partial','EntireLine', 'PartofLine',
           'only left parenthetical present', 'only right parenthetical present',
           'PLB_Yes', 'PLB_No', 'NLB_Yes', 'NLB_No', '<15withNumeric',
           'ssc_15', 'ssc_25', 'ssc_30', 'ssc_35','ssc_55','ssc_65','ssc_gt_65', '<15withoutNumeric','lcp_35','lcp_49','lcp_59','lcp_72','cur_indent_equals_pnbl','cur_indent_equals_nnbl','containsSpecialWords1','containsSpecialWords2','containsSpecialWords3','containsSpecialWords4']]

    ## interpolate the in between weights for the starting space count
    sub = wts_df.loc[:,['Possibilities','ssc_15', 'ssc_25', 'ssc_30', 'ssc_35','ssc_55','ssc_65']]
    sub = sub.set_index('Possibilities')

    for col in range(16,65):
        if col in [25,30,35]:
            continue

        sub['ssc_{}'.format(col)] = np.nan

    sub = sub.sort_index(axis =1).interpolate(axis = 1).apply(round).reset_index()
    wts_df = wts_df.merge(sub, how ='inner', on = ['Possibilities'], suffixes=('','_y'))

    ## interpolate the in between weights for the last character placement
    wts_df['lcp_30'] = 1
    wts_df['lcp_75'] = 1

    sub = pd.DataFrame()
    sub = wts_df.loc[:,['Possibilities','lcp_30','lcp_35','lcp_49','lcp_59','lcp_72','lcp_75']]
    sub = sub.set_index('Possibilities')

    for col in range(31,75):
        if col in [35,49,59,72]:
            continue

        sub['lcp_{}'.format(col)] = np.nan

    sub = sub.sort_index(axis =1).interpolate(axis = 1).apply(round).reset_index()
    wts_df = wts_df.merge(sub, how ='inner', on = ['Possibilities'], suffixes=('','_y'))

    wts_df.set_index('Possibilities',inplace =True)
    return wts_df

def give_largest(df, n):
    largest = df.nlargest(n)
    data = [x for x in largest]
    index = [f'{i}_largest' for i in range(1, len(largest)+1)]
    return pd.Series(data, index=index)


def n_largest(df, axis, n):
    '''
    Function to return the n-largest value of each
    column/row of the input DataFrame.
    '''
    return df.apply(give_largest, axis=axis, n=n)

def update_parenthetical_neighbor_wt(df):

    print("updating weghts of parenthetical neighbors")
    ## line before complete of StartingLeft or Complete
    for index in df.index:
        par = df['parenthetical'][index]
        if par == 'Absent':
            continue
        if par in ('StartingLeft','Complete'):
            print(index,par)
            try:
                if df['plb'][index] == 'N':
                    df["ps7"][index-1] += 10
                else:
                    df["ps7"][index-2] += 10
            except:
                pass
    ##line after Complete or EndingRight
        if par in ('EndingRight','Complete'):
            print(index,par)
            try:
                if df['nlb'][index] == 'N':
                    df['ps13'][index+1] += 15
                    df['ps15'][index+1] += 15
                else:
                    df['ps13'][index+2] += 15
                    df['ps15'][index+2] += 15
            except:
                pass

    return df

def gen_pos_weights(df,weights_csv):

    # prep weights csv
    wts_df = prep_weights_csv(weights_csv)

    import os
    import csv
    import re

    #mport global_file_db
    import sys
    from pathlib import Path
    import argparse
    import numpy as np
    import pandas as pd
    import math

    left_aligned = True
    contains_special = False

    for index in df.index:
        if df['ssc'][index] > 16:
            left_aligned = False
            break
    print("is script left aligned: ",left_aligned)

    for i in range(1,32):

        if i in ('23','24','32','33'):
            continue
        df["ps{0}".format(i)] = 0

    for index in df.index:

        line_no = df['line_no'][index]
        data = df['data'][index]
        plb = df['plb'][index]
        nlb = df['nlb'][index]
        contains_special = False
        print("processing weight for ",line_no)

#         if index == 0:
#             pnbl_index = 'first'
#         elif df['plb'][index] == 'N' :
#             pnbl_index = index -1
#         elif index - 1 == 0:
#             pnbl_index = 'first'
#         else:
#             pnbl_index = index -2

#         if index == df.index[-1]:
#             nnbl_index = 'last'
#         elif df['nlb'][index] == 'N' :
#             nnbl_index = index + 1
#         elif index+1 == df.index[-1]:
#             nnbl_index = 'last'
#         else:
#             nnbl_index = index + 2

        pnbl_index = False
        nnbl_index = False

        try:
            pnbl_line_no = df['pnbl_line_no'][index]
            pnbl_index = df.loc[df['line_no'] == pnbl_line_no,:].index.values[0]
        except:
            pnbl_index = False

        try:
            nnbl_line_no = df['nnbl_line_no'][index]
            nnbl_index = df.loc[df['line_no'] == nnbl_line_no,:].index.values[0]
        except:
            nnbl_index = False


        try:
            pnbl_indent = df['ssc'][pnbl_index]
        except:
            pnbl_indent = -1
        try:
            nnbl_indent = df['ssc'][nnbl_index]
        except:
            nnbl_indent = -1

        cur_indent = df['ssc'][index]
        ssc_col = 'ssc_' + str(cur_indent)
        print(ssc_col)

        case = df['case'][index]
        try:
            print("processing line no",line_no, data)
        except:
            pass

        print(plb)
        print(nlb)
        print(pnbl_indent)
        print(nnbl_indent)

        ### wights to be assigned based on space count, case,  parentheseis and plb/nlb


        # read the weights csv
        #wts_df = pd.read_csv('weights1.csv',index_col = 'Possibilities')
        ## create the wights for last character placement from 41-78 by interpolation using wwights 51 63 78

        lcp = df['lcp'][index]
        #print("lcp ",lcp)
        try:
            lcp_col = "lcp_" + str(int(lcp))
        except:
            lcp_col = "lcp_" + str(lcp)

        #print(wts_df.head(0))
        # make space dict for getting relevant space columns for weights
        sp_bin_dict = {1:'0-14',2:'15',3:'16-24',4:'25',5:'26-29',6:'30',7:'31-34',8:'35',9:'36-73',10:'74onwards'
                  }
        #loop over for  the possibilities

        for i in range(1,32):

            if i in ('23','24','32','33'):
                continue

            df["ps{0}".format(i)][index] = 0

            ## get weights for the case
            if case in ('EndUpper','MidUpper'):
                case = 'FirstLowerSomeUpper'

            if case != 'None':
                df["ps{0}".format(i)][index] += wts_df.loc["ps{0}".format(i),case]


            ## get weights based on the starting space count

            try:
                df["ps{0}".format(i)][index] += wts_df.loc["ps{0}".format(i),ssc_col]
                #print("starting wight code was here")
            except:
                pass

            print("ps{0}".format(i),df["ps{0}".format(i)][index])
            ## get weights for <19 with Numeric character or <19 without Numeric character
            try:
                start_num = True if re.search('[0-9]',data.strip()[0]) else False
            except:
                start_num = False
            pos_num = re.search('[0-9]',data)
            if (pos_num!= None) and start_num and cur_indent<15:
                df["ps{0}".format(i)][index] += wts_df.loc["ps{0}".format(i),'<15withNumeric']
            elif check_space(data)<15:
                df["ps{0}".format(i)][index] += wts_df.loc["ps{0}".format(i),'<15withoutNumeric']

            if cur_indent>65:
                df["ps{0}".format(i)][index] += wts_df.loc["ps{0}".format(i),'ssc_gt_65']
            ## get weights based on the last character placement

            print("ps{0}".format(i),df["ps{0}".format(i)][index])

            try:
                df["ps{0}".format(i)][index] += wts_df.loc["ps{0}".format(i),lcp_col]
                print("code was here")
                print(wts_df.loc["ps{0}".format(i),lcp_col])
            except Exception as e:
                print ("lcp exception is",e)
                pass

            # how far is it from position 51 63 78
            # 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78
            # modify the wights matrix and create in between weights

            #print("ps{0}".format(i),df["ps{0}".format(i)][index])

            # Calculation of weights based on plb and nlb(L-O column in sheet)
            if plb == "Y":
                df["ps{0}".format(i)][index] += wts_df.loc["ps{0}".format(i),'PLB_Yes']
            if plb == "N":
                df["ps{0}".format(i)][index] += wts_df.loc["ps{0}".format(i),'PLB_No']
            if nlb == "Y":
                df["ps{0}".format(i)][index] += wts_df.loc["ps{0}".format(i),'NLB_Yes']
            if nlb == "N":
                df["ps{0}".format(i)][index] += wts_df.loc["ps{0}".format(i),'NLB_No']

            #print("ps{0}".format(i),df["ps{0}".format(i)][index])

            # Calculation of weights based on parenthesis(H-K column in sheet)

            if re.match('\(',data.strip()[:1]) and re.match('\)',data.strip()[-1:]) :
               # print('EntireLine')
                df["ps{0}".format(i)][index] += wts_df.loc["ps{0}".format(i),'EntireLine']
            elif re.search('\(',data.strip()) and  re.search('\)',data.strip())   :
                #print('PartofLine')
                df["ps{0}".format(i)][index] += wts_df.loc["ps{0}".format(i),'PartofLine']
            elif re.search('\(',data.strip()) and  not(re.search('\)',data.strip()))   :
                #print('only left parenthetical present')
                df["ps{0}".format(i)][index] += wts_df.loc["ps{0}".format(i),'only left parenthetical present']
            elif not(re.search('\(',data.strip())) and  re.search('\)',data.strip())   :
                #print('only right parenthetical present')
                df["ps{0}".format(i)][index] += wts_df.loc["ps{0}".format(i),'only right parenthetical present']

            #df["ps{0}".format(i)][index] = math.trunc(df["ps{0}".format(i)][index])
            #print("i is ",i)
            #print(math.trunc(ps_dict["ps{0}".format(i)]))

            ## Calculation of weights based on indent equals previous / next non blank line
            if cur_indent == pnbl_indent:
                df["ps{0}".format(i)][index] += wts_df.loc["ps{0}".format(i),'cur_indent_equals_pnbl']

            if cur_indent == nnbl_indent:
                df["ps{0}".format(i)][index] += wts_df.loc["ps{0}".format(i),'cur_indent_equals_nnbl']

            print("ps{0}".format(i),df["ps{0}".format(i)][index])

        print("Special Words Check")
        ## calculation of weights based on special words
        sp_words1 = ['cut to','CUT BACK TO','FLASHCUT TO','dissolve to', 'intercut', 'Inter Cut','PBS', 'interval',
         'Flashback','FADE IN','FADE TO BLACK']
        for sp_word in sp_words1:
            print(sp_word)
            search_data =  data.replace(":","")
            match = re.match(sp_word,search_data.strip(),re.IGNORECASE)
            if match:
                contains_special = True
                    
                                                 
                break
        print (contains_special,search_data)
        if not contains_special:
            search_data = data.strip()
            ## check if within quotes
            if search_data:
                if len(search_data) > 3:
                    if (search_data.startswith('“') or search_data.startswith('"')):
                        if (search_data.endswith('”') or search_data.endswith('"')):
                            contains_special = True

        if contains_special:
            try:
                print("found match in ",data)
            except:
                print("found match ")
            for i in range(1,32):
                if i in ('23','24','32','33'):
                    continue
                df["ps{0}".format(i)][index] += wts_df.loc["ps{0}".format(i),'containsSpecialWords1']


        ## calculation of weights based on special slug words
        sp_words3 = ['INT.','EXT.','I/E','E/I','EXT-','INT-']
        if not contains_special:
            for sp_word in sp_words3:
                print(sp_word)
                #search_data =  data.replace(":","")
                found = re.search(sp_word,data.strip()[0:8])
                if found:
                    contains_special = True
                    try:
                        print("found match in ",data)
                    except:
                        print("found match ")
                    for i in range(1,32):
                        if i in ('23','24','32','33'):
                            continue
                        df["ps{0}".format(i)][index] += wts_df.loc["ps{0}".format(i),'containsSpecialWords3']
                    break


        ## calculation of weights based on special slug endings
        sp_words4 = [' - MORNING',' - DAY',' - EVENING',' - EVE',' - NIGHT',' - LATER',' - AFTERNOON']

        for sp_word in sp_words4:
            found = re.search(sp_word,data.strip())
            if found:
                contains_special = True
                try:
                    print("found match in ",data)
                except:
                    print("found match ")
                for i in range(1,32):
                    if i in ('23','24','32','33'):
                        continue
                    df["ps{0}".format(i)][index] += wts_df.loc["ps{0}".format(i),'containsSpecialWords4']
                break


        # speaker possble if single all caps word
        if left_aligned and not contains_special:
            if case == 'AllUpper' and len(data.split()) <= 2 and "." not in data and ":" not in data and df['lcp'][index] < 30 :
                print("boosting speaker possibility")
                df["ps7"][index] += 30

        ## add preassigned weight 
        if not contains_special:
            if df['preassigned_weights'][index]:      
                pre_psw_list = df['preassigned_weights'][index].split(';')          
                for psw in pre_psw_list:
                    ps = psw.split('-')[0]
                    wt = psw.split('-')[1]
                    df[ps][index] += int(wt)
                    
        
    df = update_parenthetical_neighbor_wt(df)

    if 'actual_element' not in df.columns:
        df['actual_element'] = ''

    return df

def sort_pos_decr_wts(df):
    ## sort in decreasing order

    x = n_largest(df[['ps1', 'ps2', 'ps3', 'ps4', 'ps5', 'ps6', 'ps7', 'ps8', 'ps9', 'ps10', 'ps11', 'ps12', 'ps13', 'ps14', 'ps15', 'ps16', 'ps17', 'ps18', 'ps19',
                      'ps21', 'ps22', 'ps25', 'ps26', 'ps27','ps28','ps29', 'ps30', 'ps31']], axis=1, n=28)

    df.insert(8, "1_largest", x['1_largest'])
    df.insert(9, "2_largest", x['2_largest'])
    df.insert(10, "3_largest", x['3_largest'])
    df.insert(11, "4_largest", x['4_largest'])
    df.insert(12, "5_largest", x['5_largest'])
    df.insert(13, "6_largest", x['6_largest'])
    df.insert(14, "7_largest", x['7_largest'])
    df.insert(15, "8_largest", x['8_largest'])
    df.insert(16, "9_largest", x['9_largest'])
    df.insert(17, "10_largest", x['10_largest'])
    df.insert(18, "11_largest", x['11_largest'])
    df.insert(19, "12_largest", x['12_largest'])
    df.insert(20, "13_largest", x['13_largest'])
    df.insert(21, "14_largest", x['14_largest'])
    df.insert(22, "15_largest", x['15_largest'])
    df.insert(23, "16_largest", x['16_largest'])
    df.insert(24, "17_largest", x['17_largest'])
    df.insert(25, "18_largest", x['18_largest'])
    df.insert(26, "19_largest", x['19_largest'])
    df.insert(27, "20_largest", x['20_largest'])
    df.insert(28, "21_largest", x['21_largest'])
    df.insert(29, "22_largest", x['22_largest'])
    df.insert(30, "23_largest", x['23_largest'])
    df.insert(31, "24_largest", x['24_largest'])
    df.insert(32, "25_largest", x['25_largest'])
    df.insert(33, "26_largest", x['26_largest'])
    df.insert(34, "27_largest", x['27_largest'])
    df.insert(35, "28_largest", x['28_largest'])


    b = df[['ps1', 'ps2', 'ps3', 'ps4', 'ps5', 'ps6', 'ps7', 'ps8', 'ps9', 'ps10', 'ps11', 'ps12', 'ps13', 'ps14',
            'ps15', 'ps16', 'ps17', 'ps18', 'ps19', 'ps21', 'ps22', 'ps25', 'ps26', 'ps27', 'ps28','ps29', 'ps30', 'ps31']]

    Tops = pd.DataFrame(b.apply(lambda x: list(b.columns[np.array(x).argsort()[::-1][:28]]), axis=1).to_list(),  columns=['Top1', 'Top2', 'Top3', 'Top4', 'Top5', 'Top6', 'Top7', 'Top8',
                                                                                                                          'Top9', 'Top10', 'Top11', 'Top12', 'Top13', 'Top14', 'Top15', 'Top16', 'Top17', 'Top18', 'Top19', 'Top20', 'Top21', 'Top22', 'Top23', 'Top24', 'Top25', 'Top26', 'Top27','Top28'])
    print(Tops)
    res = pd.concat([df, Tops], axis=1)

    #print("Ye kuch result hai:",res)
    res['first_largest'] = res['Top1'] + "-" + res['1_largest'].astype(str)
    res['second_largest'] = res['Top2'] + "-" + res['2_largest'].astype(str)
    res['third_largest'] = res['Top3'] + "-" + res['3_largest'].astype(str)
    res['fourth_largest'] = res['Top4'] + "-" + res['4_largest'].astype(str)
    res['fifth_largest'] = res['Top5'] + "-" + res['5_largest'].astype(str)
    res['sixth_largest'] = res['Top6'] + "-" + res['6_largest'].astype(str)
    res['seventh_largest'] = res['Top7'] + "-" + res['7_largest'].astype(str)
    res['eight_largest'] = res['Top8'] + "-" + res['8_largest'].astype(str)
    res['ninth_largest'] = res['Top9'] + "-" + res['9_largest'].astype(str)
    res['tenth_largest'] = res['Top10'] + "-" + res['10_largest'].astype(str)
    res['eleventh_largest'] = res['Top11'] + "-" + res['11_largest'].astype(str)
    res['twelth_largest'] = res['Top12'] + "-" + res['12_largest'].astype(str)
    res['thirteenth_largest'] = res['Top13'] + "-" + res['13_largest'].astype(str)
    res['fourteenth_largest'] = res['Top14'] + "-" + res['14_largest'].astype(str)
    res['fifteenth_largest'] = res['Top15'] + "-" + res['15_largest'].astype(str)
    res['sixteenth_largest'] = res['Top16'] + "-" + res['16_largest'].astype(str)
    res['seventeenth_largest'] = res['Top17'] + "-" + res['17_largest'].astype(str)
    res['eighteenth_largest'] = res['Top18'] + "-" + res['18_largest'].astype(str)
    res['ninteenth_largest'] = res['Top19'] + "-" + res['19_largest'].astype(str)
    res['tewenty_largest'] = res['Top20'] + "-" + res['20_largest'].astype(str)
    res['tone_largest'] = res['Top21'] + "-" + res['21_largest'].astype(str)
    res['ttwo_largest'] = res['Top22'] + "-" + res['22_largest'].astype(str)
    res['tthree_largest'] = res['Top23'] + "-" + res['23_largest'].astype(str)
    res['tfour_largest'] = res['Top24'] + "-" + res['24_largest'].astype(str)
    res['tfive_largest'] = res['Top25'] + "-" + res['25_largest'].astype(str)
    res['tsix_largest'] = res['Top26'] + "-" + res['26_largest'].astype(str)
    res['tseven_largest'] = res['Top27'] + "-" + res['27_largest'].astype(str)
    res['teight_largest'] = res['Top28'] + "-" + res['28_largest'].astype(str)

#     res['largest1'] = res['Top1']
#     res['largest2'] = res['Top2']
#     res['largest3'] = res['Top3']
#     res['largest4'] = res['Top4']
#     res['largest5'] = res['Top5']
#     res['largest6'] = res['Top6']
#     res['largest7'] = res['Top7']
#     res['largest8'] = res['Top8']
#     res['largest9'] = res['Top9']
#     res['largest10'] = res['Top10']
#     res['largest11'] = res['Top11']
#     res['largest12'] = res['Top12']
#     res['largest13'] = res['Top13']
#     res['largest14'] = res['Top14']
#     res['largest15'] = res['Top15']
#     res['largest16'] = res['Top16']
#     res['largest17'] = res['Top17']
#     res['largest18'] = res['Top18']
#     res['largest19'] = res['Top19']
#     res['largest20'] = res['Top20']
#     res['largest21'] = res['Top21']
#     res['largest22'] = res['Top22']
#     res['largest23'] = res['Top23']
#     res['largest24'] = res['Top24']
#     res['largest25'] = res['Top25']
#     res['largest26'] = res['Top26']
#     res['largest27'] = res['Top27']
#     res['largest28'] = res['Top28']
    # print(res)
#     res.drop(['Top1', 'Top2', 'Top3', 'Top4', 'Top5', 'Top6', 'Top7', 'Top8', 'Top9', 'Top10', 'Top11', 'Top12', 'Top13', 'Top14', 'Top15',
#               'Top16', 'Top17', 'Top18', 'Top19', 'Top20', 'Top21', 'Top22', 'Top23', 'Top24', 'Top25', 'Top26', 'Top27','Top28'], axis=1, inplace=True)

    y = ['line_no',
         'data',
         'actual_element',
         'Identification_Status',
         'scene_number',
         'plb',
         'nlb',
         'ssc',
         'lcp',
         'case',
         'parenthetical',
         'isIdentified',
         'When_Identified',
         'first_largest',
         'second_largest',
         'third_largest',
         'fourth_largest',
         'fifth_largest',
         'sixth_largest',
         'seventh_largest',
         'eight_largest',
         'ninth_largest',
         'tenth_largest',
         'eleventh_largest',
         'twelth_largest',
         'thirteenth_largest',
         'fourteenth_largest',
         'fifteenth_largest',
         'sixteenth_largest',
         'seventeenth_largest',
         'eighteenth_largest',
         'ninteenth_largest',
         'tewenty_largest',
         'tone_largest',
         'ttwo_largest',
         'tthree_largest',
         'tfour_largest',
         'tfive_largest',
         'tsix_largest',
         'tseven_largest',
         'teight_largest',
#          'largest1',
#          'largest2',
#          'largest3',
#          'largest4',
#          'largest5',
#          'largest6',
#          'largest7',
#          'largest8',
#          'largest9',
#          'largest10',
#          'largest11',
#          'largest12',
#          'largest13',
#          'largest14',
#          'largest15',
#          'largest16',
#          'largest17',
#          'largest18',
#          'largest19',
#          'largest20',
#          'largest21',
#          'largest22',
#          'largest23',
#          'largest24',
#          'largest25',
#          'largest26',
#          'largest27',
         'ps1',
         'ps2',
         'ps3',
         'ps4',
         'ps5',
         'ps6',
         'ps7',
         'ps8',
         'ps9',
         'ps10',
         'ps11',
         'ps12',
         'ps13',
         'ps14',
         'ps15',
         'ps16',
         'ps17', 'ps18', 'ps19', 'ps21', 'ps22', 'ps25', 'ps26', 'ps27', 'ps28','ps29', 'ps30', 'ps31',
        'pnbl_line_no',
        'nnbl_line_no',
        'ppnbl_line_no',
        'nnnbl_line_no',
        'pdil_line_no',
        'ndil_line_no'
        ]


    df = res.reindex(columns=y)


    return(df)


def prep_for_pos_elimination(df):


    all_pos = [ "ps{0}".format(ps) for ps in range(1,35) ]
    df.insert(12,'Identification_Status_with_weights','')


    # In[24]:


    for x in ['ps23','ps24','ps32','ps33','ps34']:
        all_pos.remove(x)


    # In[25]:


    ## go through all lines
    ## if line is identified add all possibilties
    ## get weights of the psssibilities
    ## sort the possibilities in decreasing order of weights

    def useWeights(ps):
            return int(ps.split("-")[1])

    for index in df.index:
        if df["isIdentified"][index] == 'No':
                cur_line_pos = all_pos
                ## append the weight to the possibilites
                pos_with_weights = []
                for pos in cur_line_pos:
                    wt = ''
                    pos_wt = str(pos)
                    try:
                        wt = df[pos][index].astype(int)
                        pos_wt +=  '-' + str(wt)
                    except:
                        continue
                    pos_with_weights.append(pos_wt)

                # now sort in descending order using the weights as key
                pos_with_weights = sorted(pos_with_weights,key=useWeights , reverse = True)

                line_pos_string_with_weights = ';'.join([str(elem) for elem in pos_with_weights])

                df['Identification_Status_with_weights'][index] = line_pos_string_with_weights

                ## copy over to identification status without the weights but in order of decreasing weights
                pos_without_weight = []
                for pos in pos_with_weights:
                    pos_without_weight.append(pos.split("-")[0])

                line_pos_string = ';'.join([str(elem) for elem in pos_without_weight])
                print(line_pos_string)
                df['Identification_Status'][index] = line_pos_string


    ## make a column which indicates the possibilities not to be removed
    ## possibilities not to eliminate
    df['ps_not_to_remove'] = 'ps34'
#     df['parenthetical'] = ''

    for index in df.index:
#         par = ''
#
#         print(data)
#         if re.match('\(',data.strip()[:1]):
#             if re.match('\)',data.strip()[-1:]) :
#                 par = 'Complete'
#             elif re.search('\)',data.strip()) :
#                 par = 'PartBeginningMid'
#             else:
#                 par = 'Beginning'

#         elif re.match('\)',data.strip()[-1:]):
#             if re.search('\(',data.strip()):
#                 par = 'PartMidEnd'
#             else:
#                 par = 'End'
#         # beginning end already checked so now if paren present it is mixed
#         elif re.search('\(',data.strip()) and re.search('\)',data.strip()):
#             par = 'PartMidMid'
#         elif re.search('\(',data.strip()):
#             par = 'MixedBeginning'
#         elif re.search('\)',data.strip()):
#             par = 'MixedEnd'
#         else:
#             par = 'Absent'

#         df['Parenthetical'][index] = par

        data = df['data'][index]
        pos_not_to_remove = []
        if df["isIdentified"][index] == 'No':
            ## find the top possibilities - max weight
            pos_with_wts = df["Identification_Status_with_weights"][index].split(";")
            max_pos_index = 0
            wt1 = 0
            wt2 = 0
            print(df["line_no"][index])
            for k in range(0,len(pos_with_wts)-1):
                wt1 = pos_with_wts[k].split("-")[1]
                wt2 = pos_with_wts[k+1].split("-")[1]
                print(wt1,wt2)
                if wt2 == wt1:
                    max_pos_index = k+1
                    continue
                else:
                    break

            print (max_pos_index)
            for j in range(0,max_pos_index+1):
                    pos_not_to_remove.append(df["Identification_Status"][index].split(";")[j])


            # if entire line in parenthetical don't remove ps8,ps10
            if re.match('\(',data.strip()[:1]) and re.match('\)',data.strip()[-1:]):
                pos_not_to_remove.append('ps8')
                pos_not_to_remove.append('ps10')

            words = data.split()
            if len(words[0]) > 1 and words[0].isupper():
                pos_not_to_remove.append('ps8')
                pos_not_to_remove.append('ps25')
                pos_not_to_remove.append('ps30')

            if len(words) == 1:
                pos_not_to_remove.append('ps7')


            left_p = True if re.search('\(',data) else False
            right_p = True if re.search('\)',data) else False


            if left_p  and right_p and not(re.match('\(',data.strip()[:1])):
                if (re.search('\)',data).start() -re.search('\(',data).start()) > 0:

                    pos_not_to_remove.append('ps25')
                    pos_not_to_remove.append('ps27')

            pos_not_to_remove = list(set(pos_not_to_remove))
            df['ps_not_to_remove'][index] = (";").join(str(elem) for elem in pos_not_to_remove)

    ##  go thorugh all the lines , if parenthetical is absent remove ps 8,10,11,12,25,26,27
    pos_to_remove = ['ps10','ps11','ps12','ps25','ps26','ps27']

    for index in df.index:
        if df['parenthetical'][index] == 'Absent':
            cur_line_pos = df["Identification_Status"][index].split(";")
            pos_not_to_remove = df['ps_not_to_remove'][index].split(";")

            cur_line_pos = [pos for pos in cur_line_pos if pos not in pos_to_remove]
            pos_not_to_remove = [pos for pos in pos_not_to_remove if pos not in pos_to_remove]

            df["Identification_Status"][index] = ";".join(cur_line_pos)
            df['ps_not_to_remove'][index] = ";".join(pos_not_to_remove)

    pos_to_remove = ['ps11','ps12']

    for index in df.index:
        if df['parenthetical'][index] in ('PartMidEnd','PartStartMid','PartMidMid'):
            cur_line_pos = df["Identification_Status"][index].split(";")
            pos_not_to_remove = df['ps_not_to_remove'][index].split(";")

            cur_line_pos = [pos for pos in cur_line_pos if pos not in pos_to_remove]
            pos_not_to_remove = [pos for pos in pos_not_to_remove if pos not in pos_to_remove]

            df["Identification_Status"][index] = ";".join(cur_line_pos)
            df['ps_not_to_remove'][index] = ";".join(pos_not_to_remove)


    ## refine the possibilties of first and last line

    first_line_index = 0
    if df['case'][first_line_index] == 'None':
        first_line_index += 1

    last_line_index = df.index[-1]
    if df['case'][last_line_index] == 'None':
        last_line_index -= 1


    ## keep possibilities of first and last line

    eligible_pos = ['ps1','ps2','ps17','ps18']
    first_line_pos = df["Identification_Status"][first_line_index].split(";")

    first_line_pos = [ps for ps in first_line_pos if ps in eligible_pos ]
    df['Identification_Status'][first_line_index] = ";".join(first_line_pos)
    df['ps_not_to_remove'][first_line_index] = ""

    ## keep possibilities of last line

    eligible_pos = ['ps6','ps15','ps16','ps17','ps29','ps30','ps31']
    last_line_pos = df["Identification_Status"][last_line_index].split(";")

    last_line_pos = [ps for ps in eligible_pos if ps in last_line_pos ]
    df['Identification_Status'][last_line_index] = ";".join(last_line_pos)
    df['ps_not_to_remove'][last_line_index] = ""


    # In[26]:


    for index in df.index:
        cur_line_pos = df["Identification_Status"][index].split(";")
        if len(cur_line_pos) == 1:
            continue
        ## append the weight to the possibilites
        pos_with_weights = []
        for pos in cur_line_pos:
            wt = ''
            pos_wt = str(pos)
            try:
                wt = df[pos][index].astype(int)
                pos_wt +=  '-' + str(wt)
            except:
                continue
            pos_with_weights.append(pos_wt)

        # now sort in descending order using the weights as key
        pos_with_weights = sorted(pos_with_weights,key=useWeights , reverse = True)

        line_pos_string_with_weights = ';'.join([str(elem) for elem in pos_with_weights])

        df['Identification_Status_with_weights'][index] = line_pos_string_with_weights

        ## copy over to identification status without the weights but in order of decreasing weights
        pos_without_weight = []
        for pos in pos_with_weights:
            pos_without_weight.append(pos.split("-")[0])

        line_pos_string = ';'.join([str(elem) for elem in pos_without_weight])
        print(line_pos_string)
        df['Identification_Status'][index] = line_pos_string


def examine_speaker_pos(df,audit_df):

    print("examining speaker possibilties")
    speaker_list = df.loc[df['Identification_Status'] == 'ps7','data'].astype(str)
    speaker_list = [ elem.strip() for elem in speaker_list ]
    speaker_lines_list = df.loc[(df['Identification_Status'] == 'ps7') | (df['Identification_Status'] == 'ps8'),'line_no'].to_list()
    unique_speaker_list = []
    speaker_in_two_lines_list = []
    for speaker in speaker_list:
        speaker = speaker.strip()
        #print(speaker)
        if speaker not in unique_speaker_list:
            unique_speaker_list.append(speaker)
            ## strip the blank spaces

    try:
        print(unique_speaker_list)
    except:
        pass
    print(speaker_lines_list)


    # In[173]:


    for index in df.index:
        line_no = df['line_no'][index]
        data = df['data'][index].strip()
        if df['Identification_Status'][index] == 'ps7':
              continue
        if df['Identification_Status'][index] == 'ps8':
              continue


        try:
            if ("".join(data.split()).upper() in unique_speaker_list) or (data.upper() in unique_speaker_list):
                print (line_no,data)
                if line_no not in speaker_lines_list and df['isIdentified'][index] != 'Yes':
                    speaker_lines_list.append(line_no)
        except:
            print(line_no,data,"data is not str")
            pass


        if index != df.index[-1]:
            nl_data = df['data'][index+1]
            cur_par = df['parenthetical'][index]
            if data.strip() and nl_data.strip() and cur_par == 'Absent':
                two_line_data = "".join((data+nl_data).split())
                #print(two_line_data)
                #print(two_line_data.lstrip().split(" ")[0])
                #print(unique_speaker_list)
                if two_line_data in unique_speaker_list:
                    #print("Yes")
                    print(line_no,data)
                    print(line_no,nl_data)
                    speaker_in_two_lines_list.append(line_no)

                elif two_line_data.lstrip().split("(")[0] in unique_speaker_list and data.lstrip().split("(")[0].strip() not in unique_speaker_list :

                    print(line_no,data)
                    print(line_no,nl_data)
                    speaker_in_two_lines_list.append(line_no)

    speaker_lines_list.sort()
    speaker_in_two_lines_list.sort()
    print(speaker_lines_list)
    print(speaker_in_two_lines_list)


    # In[174]:


    # create new df with line no as index
    df_line_index =df.copy().set_index('line_no')
    df_line_index.head()


    ## correct the speaker in two lines

    ## first go through speaker in two lines
    for sp_line in speaker_in_two_lines_list:
        if df_line_index['Identification_Status'][sp_line] == 'ps8':
            continue
        data = df_line_index['data'][sp_line]
        line_no = sp_line
        nl_data = df_line_index['data'][sp_line+1]
        new_data = data.strip() + nl_data.strip()

        if re.search('\(',new_data):
            par_pos = re.search('\(',new_data).start()
            before_par = new_data[:par_pos]
            after_par = new_data[par_pos:]
            df_line_index['data'][sp_line] = before_par

            df_line_index['data'][sp_line+1] = after_par
            if re.match('\)',after_par.strip()[-1]):
                df_line_index['Identification_Status'][sp_line+1] = 'ps10'
                df_line_index['parenthetical'][sp_line+1] = 'Complete'
                df_line_index['When_Identified'][sp_line+1] = 'ExaminingSpeakerLines'
            elif re.match('\)',after_par.strip()):
                df_line_index['Identification_Status'][sp_line+1] = 'ps26'
                df_line_index['parenthetical'][sp_line+1] = 'PartStartMid'
                df_line_index['When_Identified'][sp_line+1] = 'ExaminingSpeakerLines'
            else:
                df_line_index['Identification_Status'][sp_line+1] = 'ps11'
                df_line_index['parenthetical'][sp_line+1] = 'StartingLeft'
                df_line_index['When_Identified'][sp_line+1] = 'ExaminingSpeakerLines'


            # add line no to speaker lines
            speaker_lines_list.append(sp_line)


            # print to report
    #         audit_report.write("%s.line no: %s , Found Speaker in two lines with continuing parenthetical, Separated speaker \n" %(audit_sno,sp_line))
    #         audit_sno += 1

        else:
            # print to report
            print("%s.line no: %s , Found Speaker in two lines merged the line \n" %(audit_sno,sp_line))
    #         audit_report.write("%s.line no: %s , Found Speaker in two lines merged the line \n" %(audit_sno,sp_line))
    #         audit_sno += 1

            # correct the line
            df_line_index['data'][sp_line] = new_data
            # add line no to speaker lines
            speaker_lines_list.append(sp_line)

            # delete the next line
            df_line_index.drop((sp_line+1),inplace= True)
            audit_df['line_removed'][sp_line+1] = 'Yes'
            print (data,nl_data,new_data)


    # In[176]:


    ## go through the speaker lines and rectify them , print to audit report
    for sp_line in speaker_lines_list:
        line_no = sp_line
        ##identify the above identified speaker lines as ps7
        if df_line_index['Identification_Status'][sp_line] in ('ps7','ps8'):
            continue

        df_line_index['Identification_Status'][sp_line] = 'ps7'
        df_line_index['parenthetical'][sp_line] = 'Absent'
        if df_line_index['When_Identified'][sp_line] != 'FirstStrictConditions' :
            df_line_index['When_Identified'][sp_line] = 'ExaminingSpeakerLines'

        sp_data = df_line_index['data'][sp_line].strip()
        print(sp_line)
        try:
            print(sp_data)
        except:
            pass
        cur_indent = df_line_index['ssc'][sp_line]
        try:
            new_speaker_indent = int(cur_indent)
        except:
            new_speaker_indent = 35


        ## check and correct case
        if df_line_index['case'][sp_line] != 'AllUpper':
            try:
                print(sp_data)
            except:
                pass
            df_line_index['data'][sp_line] = sp_data.rjust(len(sp_data)+new_speaker_indent).upper()
            df_line_index['case'][sp_line] = 'AllUpper'
            try:
                print("case corrected to:",df_line_index['data'][sp_line])
            except:
                pass
            audit_df['case_corrected'][line_no] = 'Speaker Case corrected to All Upper Case'


        ## check and correct gap between speaker name
        sp_data_nogap = "".join(sp_data.split())
        if sp_data != sp_data_nogap:
            try:
                print(sp_data)
            except:
                pass
            ## removing gap only if first world is single
            first_word = sp_data.split()[0]
            if len(first_word) == 1:
                df_line_index['data'][sp_line] = (sp_data_nogap.upper()).rjust(len(sp_data_nogap)+new_speaker_indent)
                try:
                    print("speaker name corrected to:",df_line_index['data'][sp_line])
                except:
                    pass
                audit_df['space_removed_between_characters'][line_no] = 'Yes'


    df_line_index.reset_index(inplace=True)


    df = df_line_index.sort_index().reset_index(drop=True)


    ## mixed speaker identification
    ## use the speakers to idenfity mixed speaker lines ps8, 25,30,21,28


    # In[184]:

    df['ps_not_to_remove'] = df['ps_not_to_remove'].astype(str)
    try:
        print(unique_speaker_list)
    except:
        pass
    print("performing checks for speaker followed by parenthetical")
    for index in df.index:
        if df['Identification_Status'][index] == 'ps8':
            continue
        line_no = df['line_no'][index]
        print("line_no",line_no)
        print(df.dtypes)
        new_line_no = 0.0
        data = df['data'][index]
        cur_pos_list = df['Identification_Status'][index].split(";")
        top_pos = cur_pos_list[0]
        new_pos_list = cur_pos_list
        try:
            pos_not_to_remove = df['ps_not_to_remove'][index].split(";")
        except:
            pos_not_to_remove = []
        set_1 = set(cur_pos_list)
        set_2 = set(pos_not_to_remove)
        pos_not_to_remove = list(set.intersection(set_1,set_2))

        for speaker in unique_speaker_list:
            check_done =False
            if re.search(speaker,data) and df['Identification_Status'][index] not in ('ps7','ps8')  :

                # check if speaker is at start of line followed by something (like parenthetical)
                pos_starts = re.search(speaker,data,re.IGNORECASE).start()
                pos_end = re.search(speaker,data,re.IGNORECASE).end()
                before_speaker = data[:pos_starts]
                after_speaker = data[pos_end:]
                print("match found")
                try:

                    print("data 2347:",data)
                    print("speaker 2348:", speaker)
                    print("before speaker 2349 :", before_speaker)
                    print("after speaker 2350:",after_speaker)
                except:
                    pass
                try:
                    char1_after_speaker = after_speaker.lstrip()[0]
                except:
                    char1_after_speaker = ''
                cur_indent = df['ssc'][index]
                try:
                    new_speaker_indent = int(cur_indent)
                except:
                    new_speaker_indent = 35


                try:
                    print(before_speaker)
                    print(after_speaker)
                    print("char1_after_speaker 2367:",char1_after_speaker)
                except:
                    pass
                ## separate parenthtical if speaker is followed by parenthtical
                if before_speaker.isspace() and char1_after_speaker == '(' and df['parenthetical'][index] == 'PartMidEnd' and "V.O." not in str(after_speaker): 
                    #print("before_speaker 2372:", before_par)
                    print ("Seperating parenthetical")
                    print("Identifying speaker")
                    print(index)
                    df['data'][index] = before_speaker + speaker
                    df['parenthetical'][index] = 'Absent'
                    df['When_Identified'][index] = 'ExaminingSpeakerLines'
                    df['case'][index] = 'AllUpper'
                    df['Identification_Status'][index] = 'ps7'
                    nlb = df['nlb'][index]
                    df['nlb'][index] = 'N'

                    audit_df['line_broken_into_multiple_lines'][line_no] = 'Separated Speaker and Parenthetical'

                    #print(df['Identification_Status'][index])
                    #new_line_no = str(int(line_no) + 0.5)
                    new_line_no = line_no + 0.5
                    print(type(line_no),type(new_line_no))
                    print("identifying parenthetical")
                    print(df.dtypes)
                    df.loc[index + 0.25] = np.nan
                    df.loc[index + 0.25,'data'] = str(after_speaker)
                    df.loc[index + 0.25,'parenthetical'] = 'Complete'
                    df.loc[index + 0.25,'When_Identified'] ='ExaminingSpeakerLines'
                    df.loc[index + 0.25,'Identification_Status'] = 'ps10'
                    df.loc[index + 0.25,'case'] = ''
                    df.loc[index + 0.25,'plb'] = 'N'
                    df.loc[index + 0.25,'nlb'] = nlb
                    df.loc[index + 0.25,'line_no'] = new_line_no
                    #print(df.loc[index + 0.25]['line_no'] ,new_line_no)
                    #df['line_no'] = pd.to_numeric(df['line_no'],errors ='coerce')

                    df = df.sort_index().reset_index(drop=True)
                    
                    audit_df.loc[new_line_no] = np.nan
                    audit_df['line_removed'][new_line_no] = 'No'
                    print(df.dtypes)
                    print(audit_df.dtypes)
                    continue


                elif  before_speaker.isspace() and char1_after_speaker == '(' and df['parenthetical'][index] != 'PartMidMid':
                    ##make the line possibilities as ps8;ps25
                    ## next non-blank characteer after speker is left (
                    print ("parenthetical mix")
                    new_pos_list = ['ps8','ps25']
                    ## check and change the indent to speaker indent of 35
                    if df['ssc'][index] != 35:
                        df['ssc'][index] = new_speaker_indent
                        df['data'][index] = data.strip().rjust(len(data.strip()) + new_speaker_indent)

#                         with open(audit_report_path,'a') as audit_report:
#                             audit_report.write("%s. line no: %s , Corrected Speaker Mix indent to  \n" %(audit_sno,index))
#                             audit_sno += 1

                    #df_line_index['Identification_Status'][index] = 'ps8;ps25'
    #                 break
                elif before_speaker.isspace() and (not after_speaker.isspace()) :
                    ## add code to remove
                    # cannot be ps1,ps2,ps3,ps7,ps9,ps10,ps11,ps12,ps16,ps17,ps18,ps19,ps21,ps22,ps26,ps27,ps28,ps29,ps31,
                    ps_remove = ['ps1','ps2','ps3','ps7','ps9','ps10','ps11','ps12','ps16','ps17','ps18','ps19','ps21','ps22','ps26','ps27','ps28','ps29','ps31']
                    for ps in ps_remove:
                        try:
                            new_pos_list.remove(ps)
                        except:
                            continue

                    #df_line_index['Identification_Status'][index] = 'ps30;ps4;ps5;ps6;ps8;ps13;ps14;ps15'
                    print ("present but not parenthetical removed except - ps30;ps4;ps5;ps6;ps8;ps13;ps14;ps15")
    #                 break
                elif (not before_speaker.isspace()) and after_speaker.isspace():
                    new_pos_list = ['ps21','ps28','ps5','ps4']
                    #df_line_index['Identification_Status'][index] = 'ps21;ps28'
                    print ("before speaker present")
                    check_done = True
                else:
                    new_pos_list = cur_pos_list
                    print("no change done")

                ## append the  posibility which were not to be removed back to the list if it got eliminated in the intersection

                for ps in pos_not_to_remove:
                    if ps not in new_pos_list:
                        new_pos_list.insert(0,ps)

                df['Identification_Status'][index] = ";".join([str(elem) for elem in new_pos_list])
                print('\n')

                if check_done:
                    break

                
    for index in df.index:
        if df['Identification_Status'][index] == 'blank' or (len(df['Identification_Status'][index].split(";")) == 1):
            df['isIdentified'][index] = 'Yes'
        else:
            df['isIdentified'][index] = 'No'

    return df

def examine_speaker_next_lines(df,audit_df):
    df.reset_index(inplace=True, drop=True)

    ## identifying lines after speaker
    ## get the speaker lines

    speaker_lines_list = df.loc[(df['Identification_Status'] == 'ps7') | (df['Identification_Status'] == 'ps8'),'line_no'].to_list()

    ## go through the speaker lines and identify the lines after speaker
    for line in speaker_lines_list:
        blank_to_delete = []
        index = df.loc[df['line_no'] == line,:].index.values[0]
        data = df['data'][index]
        speaker_name = data.strip()
        print("\n")
        print("speaker index",index)
        print("speaker line no",line)
        try:
            print("data:\n",data)
        except:
            pass
        # move the index to next nbl line to check it

        if df['nlb'][index] == 'Y':
            index += 2
        else:
            index += 1

        possible_dialog_line = False
        parenthetical_begun = False
        ## examine the lines(if any) after speaker and before dialougue
        ## move index till end paranthetical comes
        while not possible_dialog_line:
            data = df['data'][index]

            line_no = df['line_no'][index]
            cur_line_par = df['parenthetical'][index]
            print("examining line")
            try:
                print(data)
                print(line_no,cur_line_par)
            except:
                pass
            if df['Identification_Status'][index] == 'ps10':
                index += 1
                #possible_dialog_line = True

            elif cur_line_par == 'Complete':
                # complete line in paranthetical
                print("Identifying as Parenthetical Complete")
                df['Identification_Status'][index] = 'ps10'
                df['When_Identified'][index] = 'ExaminingLinesAfterSpeaker'
                index +=1
                possible_dialog_line = True
            elif cur_line_par == 'StartingLeft' and not(parenthetical_begun) :
                # line has paranthetical in beginning only
                print("identifying as parenthetical Beginning")
                df['Identification_Status'][index] = 'ps11'
                df['When_Identified'][index] = 'ExaminingLinesAfterSpeaker'
                parenthetical_begun = True
                index +=1
            elif cur_line_par == 'EndingRight' and parenthetical_begun:
                # line has paranthetical at end only
                print("Identifying as parenthetical end")
                df['Identification_Status'][index] = 'ps12'
                df['When_Identified'][index] = 'ExaminingLinesAfterSpeaker'
                index +=1
                possible_dialog_line = True

            elif cur_line_par == 'Absent' and parenthetical_begun:
                # multiple lines could be in paranthetical
                print("Identifying as parenthetical middle")
                df['Identification_Status'][index] = 'ps20'
                df['When_Identified'][index] = 'ExaminingLinesAfterSpeaker'
                index +=1

            elif df['Identification_Status'][index] == 'ps13' or df['Identification_Status'][index] == 'ps14':
                ## dialogue end cans still be there
                index += 1

    #         elif cur_line_par == 'PartBeginningMid':
    #             # starting part of line in paranthetical
    #             print("Identifying as parenthetical mix with succeeding dialogue")
    #             df['Identification_Status'][index] = 'ps26'
    #             df['When_Identified'][index] = 'ExaminingLinesAfterSpeaker'
    #             index +=1
    #             possible_dialog_line = True

    #         elif cur_line_par == 'PartMidEnd':
    #             # end part line in parenthetical
    #             print("Identifying as parenthetical mix with preceeding dialogue")
    #             df['Identification_Status'][index] = 'ps27'
    #             df['When_Identified'][index] = 'ExaminingLinesAfterSpeaker'
    #             ## breaking the line to dialogue and parenthtical complete-

    #             index +=1
    #             possible_dialog_line = True

            elif cur_line_par == 'Absent':
                print("line should be dialogue")
                possible_dialog_line = True
            else:
                print("line could be dialogue")
                possible_dialog_line = True

            print(line_no,possible_dialog_line,parenthetical_begun)


        ## skip if blank
        if df['Identification_Status'][index] == 'blank':
            print("skipping blank line")
            blank_to_delete.append(index)
            index +=1


        # check if the current line has possibility of being a dialogue , if not move to next speaker line
        if 'ps15' not in df['Identification_Status'][index].split(";") :
            print("line does not have possibility of dialogue, so cannot process")
            continue
        else:
            print("dialogue line(s) after speaker")


        cur_indent = df['ssc'][index]
        cur_line_par = df['parenthetical'][index]

        next_line_blank = True if df['nlb'][index] == 'Y' else False

        if index+2 > df.index[-1]:
            break

        if next_line_blank:
            next_nbl_indent = df['ssc'][index+2]
            next_nbl_par = df['parenthetical'][index+2]
            next_nbl_case = df['case'][index+2]
            next_nbl_data = df['data'][index+2]
            j = index + 2

        else:
            next_nbl_indent = df['ssc'][index+1]
            next_nbl_par = df['parenthetical'][index+1]
            next_nbl_case = df['case'][index+1]
            next_nbl_data = df['data'][index+1]
            j = index + 1

        start_index = index
        rev_index = index
        lines_count = 1
        data = df['data'][index]
        print("all lines after speaker with same indent with parentheticals")
        try:
            print(data)
        except:
            pass

        dia_indent = cur_indent
        print(dia_indent)
        ## adding condition - next line can be parenthetical
        ## removing conditions - next to next line check not required
        ## get the number of line with same indent
        while (cur_indent == next_nbl_indent or dia_indent == next_nbl_indent or next_nbl_par in ('Complete','StartingLeft','EndingRight')) and not (next_nbl_case == 'AllUpper') and (not next_line_blank): #  or next_nbl_case in ('AllLower','MidUpper','Partial')): ## considering dialogue ends if next blank except next line blank and nn line case Alllower
                                                     #and len(next_nbl_data.split()) == 1):

            lines_count +=1
            cur_indent = next_nbl_indent
            cur_line_par = next_nbl_par
            next_line_blank = True if df['nlb'][j] == 'Y' else False
            data = df['data'][j]
            try:
                print(data)
                print(cur_indent)
            except:
                pass
            rev_index = j

            if j+1 >= df.index[-1]:
                break

            if next_line_blank:
                break ## as not considering space between dialogue .. action becoming dialogue
                #next_nbl_indent = df['ssc'][j+2]
                #next_nbl_par = df['parenthetical'][j+2]
                #next_nbl_case = df['case'][j+2]
                #next_nbl_data = df['data'][j+2]
                #j += 2

            else:
                next_nbl_indent = df['ssc'][j+1]
                next_nbl_par = df['parenthetical'][j+1]
                next_nbl_case = df['case'][j+1]
                next_nbl_data = df['data'][j+1]
                j += 1

            print("\n Next line indent is",next_nbl_indent)


        ## now lets start examing these lines in reverse order
        ## if last line has parenthtical end or complete then it is action mixed not dialogue

        last_line_par = cur_line_par
        data = df['data'][rev_index]
        dialogue_end_identified = False
        dia_end = rev_index
        ###


        # last line is mixed with parenthetical. if it is parenthetical Mid end ->  separate to new action line
        if last_line_par == 'PartMidEnd':
            print("Dialogue mixed with parenthetical")
            #separate line to before and after parenthetical
            par_start = re.search('\(',data).start()
            before_par = data[:par_start]
            after_par = data[par_start:]
            # make current line as before par and tag as dialogue
            print(" identifying before parenthentical line as ps15")
            try:
                print(before_par)
            except:
                pass
            df['data'][rev_index] = before_par
            next_line_flag = df['nlb'][rev_index]
            df['nlb'][rev_index] = 'N'
            df['Identification_Status'][rev_index] = 'ps15'
            df['parenthetical'][rev_index] = 'Absent'
            df['When_Identified'][rev_index] = 'ExaminingLinesAfterSpeaker'
            df['isIdentified'][rev_index] = 'Yes'
            dialogue_end_identified = True


            #make new next line as action line ps6
            print("action after dialogue, separating to newline , identifying line as ps6")
            try:
                print(after_par)
                print("after_par is here")
            except:
                pass
            print("df['line_no'][rev_index]:",df['line_no'][rev_index])
            print(df['line_no'][rev_index])
            line_no = df['line_no'][rev_index]
            next_line_no = df['line_no'][rev_index+1]
            new_line_no = (float(line_no) + float(next_line_no)) / 2
            
            if new_line_no in audit_df.index:
                new_line_no = (float(new_line_no) + float(next_line_no))/2
            new_line_no = new_line_no

            audit_df.loc[new_line_no] = np.nan
            audit_df.loc[new_line_no]['line_removed'] = 'No'

            audit_df.loc[line_no]['line_broken_into_multiple_lines'] = 'Separated Action from Dialogue and added Speaker'

            # add line before action end
            df.loc[rev_index + 0.25] = np.nan
            df.loc[rev_index + 0.25,'ssc'] = 0
            new_data = speaker_name.capitalize() + ' ' + after_par.replace('(','').replace(')','')
            df.loc[rev_index + 0.25,'data'] = new_data
            df.loc[rev_index + 0.25,'case'] = ''
            df.loc[rev_index + 0.25,'plb'] = 'N'
            df.loc[rev_index + 0.25,'nlb'] = next_line_flag
            df.loc[rev_index + 0.25,'Identification_Status'] = 'ps6'
            df.loc[rev_index + 0.25,'When_Identified'] = 'ExaminingLinesAfterSpeaker'
            df.loc[rev_index + 0.25,'isIdentified'] = 'Yes'
            df.loc[rev_index + 0.25,'parenthetical'] = 'Absent'
            df.loc[rev_index + 0.25,'line_no'] = new_line_no
            try:
                print(new_data)
            except:
                pass
            # insert audit report


            df = df.sort_index().reset_index(drop=True)

        elif last_line_par == 'EndingRight':
            print("last line has parenthtical end")
            # if parentical is end then find the beginning and split after begiining identify as action
            j=1
            beginning_not_found = True
            while beginning_not_found:
                print("looking for beginning parenthtical")
                data = df['data'][rev_index-j]
                try:
                    print(data)
                except:
                    pass
                if df['parenthetical'][rev_index-j] in ('StartingLeft','MixedLeft') :
                    beginning_not_found = False
                else:
                    j+=1
            if beginning_not_found == False :
                print("parenthetical beginning found")
                if  df['parenthetical'][rev_index-j] == 'MixedLeft' and (rev_index-j)>=start_index:
                    data = df['data'][rev_index-j]
                    #separate line to before and after parenthetical
                    par_start = re.search('\(',data).start()
                    before_par = data[:par_start]
                    after_par = data[par_start:]
                    # make current line as before par and tag as dialogue
                    print(" splitting and identifying before parenthentical line as ps15")
                    try:
                        print(before_par)
                    except:
                        pass
                    print("here")
                    df['data'][rev_index-j] = before_par
                    next_line_flag = df['nlb'][rev_index-j]
                    df['nlb'][rev_index-j] = 'N'
                    df['Identification_Status'][rev_index-j] = 'ps15'
                    df['parenthetical'][rev_index-j] = 'Absent'
                    df['When_Identified'][rev_index-j] = 'ExaminingLinesAfterSpeaker'
                    df['isIdentified'][rev_index-j] = 'Yes'
                    dialogue_end_identified = True
                    dia_end = rev_index-j

                    #make new next line as action line
                    print("action after dialogue, separating to newline ")
                    try:
                        print(after_par)
                        print("after par")
                    except:
                        pass
                    
                    line_no = df['line_no'][rev_index-j]
                    print("2799",type(line_no))
                    try:
                        next_line_no = df['line_no'][rev_index-j+1]
                        print("2802",type(new_line_no))
                    except:
                        next_line_no = df['line_no'][int(rev_index-j+1)]
                        print("2805",rev_index-j+1)
                    try:
                        new_line_no = (line_no + next_line_no) / 2
                    except:
                        new_line_no = (float(line_no) + float(next_line_no)) / 2
                        
                    try:
                        if new_line_no in audit_df.index:
                            new_line_no = (new_line_no + next_line_no)/2
                    except:
                        if new_line_no in audit_df.index:
                            new_line_no = (float(new_line_no) + float(next_line_no))/2

                    audit_df.loc[new_line_no] = np.nan
                    audit_df.loc[new_line_no]['line_removed'] = 'No'

                    audit_df.loc[line_no]['line_broken_into_multiple_lines'] = 'Separated Action from Dialogue and added Speaker'

                    # add new action line , audit report, and change flag
                    df.loc[rev_index-j + 0.25] = np.nan
                    df.loc[rev_index-j + 0.25,'ssc'] = 0
                    df.loc[rev_index-j + 0.25,'data'] = after_par
                    df.loc[rev_index-j + 0.25,'case'] = ''
                    df.loc[rev_index-j + 0.25,'plb'] = 'N'
                    df.loc[rev_index-j + 0.25,'nlb'] = next_line_flag
                    #df.loc[index + 0.25,'Identification_Status'] = 'ps6'
                    df.loc[rev_index-j + 0.25,'When_Identified'] = 'ExaminingLinesAfterSpeaker'
                    df.loc[rev_index-j + 0.25,'isIdentified'] = 'Yes'
                    df.loc[rev_index-j + 0.25,'parenthetical'] = 'StartingLeft'
                    df.loc[rev_index-j + 0.25,'line_no'] = new_line_no

                    df = df.sort_index().reset_index(drop=True)

                    rev_index += 1
                    # insert audit report

            if  df['parenthetical'][rev_index-j] == 'StartingLeft' and (rev_index-j)>=start_index:
                if j >=1:
                    df['Identification_Status'][rev_index-j] = 'ps4'
                    df['When_Identified'][rev_index-j] = 'ExaminingLinesAfterSpeaker'
                    df['isIdentified'][rev_index-j] = 'Yes'
                    cur_data = df['data'][rev_index-j]
                    new_data = speaker_name.capitalize() + ' ' + cur_data.replace('(','').strip()
                    try:
                        print(new_data)
                    except:
                        pass
                    df['data'][rev_index-j] =  new_data
                    df['parenthetical'][rev_index-j] = 'Absent'
                    j -= 1

                    while j != 0:
                        df['Identification_Status'][rev_index-j] = 'ps5'
                        df['When_Identified'][rev_index-j] = 'ExaminingLinesAfterSpeaker'
                        df['isIdentified'][rev_index-j] = 'Yes'
                        try:
                            print(df['data'][rev_index-j])
                        except:
                            pass
                        j -= 1
                df['Identification_Status'][rev_index] = 'ps6'
                df['When_Identified'][rev_index] = 'ExaminingLinesAfterSpeaker'
                df['isIdentified'][rev_index] = 'Yes'
                cur_data = df['data'][rev_index]
                new_data = cur_data.replace(')','').strip()
                df['data'][rev_index] =  new_data
                df['parenthetical'][rev_index] = 'Absent'
                try:
                    print(new_data)
                except:
                    pass
                # insert audit report

        elif last_line_par == 'Absent' and df['case'][rev_index] != 'AllUpper':
            print("Identifying as dialogue end")
            df['Identification_Status'][rev_index] = 'ps15'
            df['When_Identified'][rev_index] = 'ExaminingLinesAfterSpeaker'
            df['isIdentified'][rev_index] = 'Yes'
            dialogue_end_identified = True


        ### Now the last line or lines till parenthtical start have been examined
        ## if dialogue end is not identified then any last dialogue will be end


        if not dialogue_end_identified:
            print("Could not identify the dialogue")
            continue
        else:
            print("dialogue end identfied as")
            data = df['data'][dia_end]
            try:
                print(data)
            except:
                pass
        ## not for the remaining lines identify first as dialoguee beginning and others as middle
        j = start_index
        cur_line_par = df['parenthetical'][j]
        data = df['data'][j]
        parenthetical_begun = False

        if j < dia_end :
            print("\n")
            try:
                print(data)
            except:
                pass

            if cur_line_par == 'Absent':

                print("Identifying as dialogue begining")

                df['Identification_Status'][j] = 'ps13'
                df['When_Identified'][j] = 'ExaminingLinesAfterSpeaker'
                df['isIdentified'][j] = 'Yes'

            elif cur_line_par in ('PartMidEnd'):
                print("Identifying as dialogue mixed with parenthetical")

                df['Identification_Status'][j] = 'ps27'
                df['When_Identified'][j] = 'ExaminingLinesAfterSpeaker'
                df['isIdentified'][j] = 'Yes'

            elif cur_line_par == 'MixedLeft':
                print("Identifying as dialogue mixed with parenthtical")

                df['Identification_Status'][j] = 'ps27'
                df['When_Identified'][j] = 'ExaminingLinesAfterSpeaker'
                df['isIdentified'][j] = 'Yes'
                parenthetical_begun = True

            elif cur_line_par == 'StartingLeft':
                print("Identifying as parenthetical beginning")

                df['Identification_Status'][j] = 'ps11'
                df['When_Identified'][j] = 'ExaminingLinesAfterSpeaker'
                df['isIdentified'][j] = 'Yes'
                parenthetical_begun = True


            elif cur_line_par in ('PartStartMid'):
                print("Identifying as parenthetical mixed with dialog")

                df['Identification_Status'][j] = 'ps26'
                df['When_Identified'][j] = 'ExaminingLinesAfterSpeaker'
                df['isIdentified'][j] = 'Yes'

            elif cur_line_par in ('PartMidMid') :
                print("Identifying as dialogue mixed with parenthtical ")

                df['Identification_Status'][j] = 'ps26;ps27'
                df['When_Identified'][j] = 'ExaminingLinesAfterSpeaker'
                df['isIdentified'][j] = 'Yes'

            next_line_blank = True if df['nlb'][j] == 'Y' else False

            if next_line_blank :
                j += 2
            else:
                j += 1


        else:
            continue

        ## now end and beginning have been examined . rest are middle if parentheical absent
        cur_line_par = df['parenthetical'][j]
        data = df['data'][j]


        while j < dia_end :
            print("\n")
            try:
                print(data)
            except:
                pass


            if cur_line_par == 'Absent':

                print("Identifying as dialogue middle")

                df['Identification_Status'][j] = 'ps14'
                df['When_Identified'][j] = 'ExaminingLinesAfterSpeaker'
                df['isIdentified'][j] = 'Yes'

            elif cur_line_par in ('PartMidEnd'):
                print("Identifying as dialogue mixed with parenthetical")

                df['Identification_Status'][j] = 'ps27'
                df['When_Identified'][j] = 'ExaminingLinesAfterSpeaker'
                df['isIdentified'][j] = 'Yes'

            elif cur_line_par == 'MixedLeft':
                print("Identifying dialogue mixed with parenthtical")

                df['Identification_Status'][j] = 'ps27'
                df['When_Identified'][j] = 'ExaminingLinesAfterSpeaker'
                df['isIdentified'][j] = 'Yes'
                parenthetical_begun = True

            elif cur_line_par == 'StartingLeft':
                print("Identifying as parenthetical beginning")

                df['Identification_Status'][j] = 'ps11'
                df['When_Identified'][j] = 'ExaminingLinesAfterSpeaker'
                df['isIdentified'][j] = 'Yes'
                parenthetical_begun = True

            elif cur_line_par in ('Absent') and parenthetical_begun:
                print("Identifying as parenthetical middle")

                df['Identification_Status'][j] = 'ps20'
                df['When_Identified'][j] = 'ExaminingLinesAfterSpeaker'
                df['isIdentified'][j] = 'Yes'
                parenthetical_begun = True

            elif cur_line_par in ('EndingRight') and parenthetical_begun:
                print("Identifying as parenthetical ending")

                df['Identification_Status'][j] = 'ps12'
                df['When_Identified'][j] = 'ExaminingLinesAfterSpeaker'
                df['isIdentified'][j] = 'Yes'
                parenthetical_begun = False

            elif cur_line_par in ('MixedRight') and parenthetical_begun:
                print("Identifying as dialogue mixed with parenthetical ")

                df['Identification_Status'][j] = 'ps26'
                df['When_Identified'][j] = 'ExaminingLinesAfterSpeaker'
                df['isIdentified'][j] = 'Yes'
                parenthetical_begun = False

            elif cur_line_par in ('PartStartMid'):
                print("Identifying as parenthetical mixed with dialog")

                df['Identification_Status'][j] = 'ps26'
                df['When_Identified'][j] = 'ExaminingLinesAfterSpeaker'
                df['isIdentified'][j] = 'Yes'

            elif cur_line_par in ('PartMidMid') :
                print("Identifying as dialogue mixed with parenthtical ")

                df['Identification_Status'][j] = 'ps26;ps27'
                df['When_Identified'][j] = 'ExaminingLinesAfterSpeaker'
                df['isIdentified'][j] = 'Yes'


            next_line_blank = True if df['nlb'][j] == 'Y' else False

            if next_line_blank :
                j += 2
            else:
                j += 1

            data = df['data'][j]
            print("printing_data before loop")
            cur_line_par = df['parenthetical'][j]

    for index in df.index:
        if df['Identification_Status'][index] == 'blank' or (len(df['Identification_Status'][index].split(";")) == 1):
            df['isIdentified'][index] = 'Yes'
        else:
            df['isIdentified'][index] = 'No'

    return df

def prep_pnnbl_wts(csv_pnbl_nnbl,cur_dir):

    pnbl_nnbl_df = pd.read_csv(csv_pnbl_nnbl,skiprows = [0])
    pnbl_df = pnbl_nnbl_df.iloc[:,[0,28,29,30,31,32,33,34,35,36,37,38,39]]
    nnbl_df = pnbl_nnbl_df.iloc[:,[0,41,42,43,44,45,46,47,48,49,50,51,52]]

    pnbl_df.rename(columns={pnbl_df.columns[0]:'Possibilities',
                                pnbl_df.columns[1]:'ps2',
                                 pnbl_df.columns[2]:'ps1',
                                 pnbl_df.columns[3]:'ps3',
                                 pnbl_df.columns[4]:'ps4',
                                 pnbl_df.columns[5]:'ps5',
                                 pnbl_df.columns[6]:'ps6',
                                 pnbl_df.columns[7]:'ps7',
                                 pnbl_df.columns[8]:'ps10',
                                 pnbl_df.columns[9]:'ps13',
                                 pnbl_df.columns[10]:'ps14',
                                 pnbl_df.columns[11]:'ps15',
                                 pnbl_df.columns[12]:'ps16',

                                },inplace=True)

    nnbl_df.rename(columns={nnbl_df.columns[0]:'Possibilities',
                                nnbl_df.columns[1]:'ps3',
                                 nnbl_df.columns[2]:'ps2',
                                 nnbl_df.columns[3]:'ps1',
                                 nnbl_df.columns[4]:'ps16',
                                 nnbl_df.columns[5]:'ps13',
                                 nnbl_df.columns[6]:'ps14',
                                 nnbl_df.columns[7]:'ps15',
                                 nnbl_df.columns[8]:'ps10',
                                 nnbl_df.columns[9]:'ps7',
                                 nnbl_df.columns[10]:'ps4',
                                 nnbl_df.columns[11]:'ps5',
                                 nnbl_df.columns[12]:'ps6',

                                },inplace=True)


    pnbl_df.to_csv(os.path.join(cur_dir,'pnbl_weights.csv'),index = False)
    nnbl_df.to_csv(os.path.join(cur_dir,'nnbl_weights.csv'),index = False)

    #return pnbl_df,nnbl_df


def identify_using_pnbl_nnbl(df,identify_using,iteration):

    def takeNumeric(ps):
        return int(ps[2:])

    def useWeights(ps):
        return int(ps.split("-")[1])

    count_lines_identified = 0
    all_pos = [ "ps{0}".format(ps) for ps in range(1,35) ]
    new_lines_identified = False
    ## column to store the current identification status
    ## for 1st iteration this is the stage1 output (output after the strict conditions)
    ##last_identification = 'stage-1_output'
    df['Identification_Status'] = df[identify_using]
    #pos_count_column_name_before = 'CountofPossibilities_before_Iteration' + str(iteration)
    pos_count_column_name = 'CountofPossibilities_afterIteration' + str(iteration)
    df[pos_count_column_name] = ''
    line_nos_identified = []
    total_pos_before = 0
    total_pos_after = 0
    pos_decreased = False


    for index in df.index:
        ## process the line only if it is unidentified (inclduing more than 1 possibilities)
        cur_line_pos = df['Identification_Status'][index].split(";")
        line_no = df['line_no'][index]

#         if (df['isIdentified'][index] == 'No'):
#             print(line_no,": line currently unidentified")
#             print(df['data'][index])
#             line_pos_using_pnbl = all_pos
#             line_pos_using_nnbl = all_pos
#             total_pos_before += len(all_pos)
#         el
        if len(cur_line_pos) > 1:
            print(line_no,": line currently has more than one possibilties")
            try:
                print(df['data'][index])
            except:
                pass

            print(cur_line_pos)
            line_pos_using_pnbl = cur_line_pos
            line_pos_using_nnbl = cur_line_pos
            total_pos_before += len(cur_line_pos)
        else:
            print(line_no, ": line already identified as",df['Identification_Status'][index])
            total_pos_before += 1
            total_pos_after += 1
            continue


        ## for unidentified line find the previous-line which is non-blank
        j=1
        pnbl = 'not found'
        print("looking for previous non-blank line")
        while (pnbl == 'not found') and ((index-j) >= 0 ):
            if (df['Identification_Status'][index-j] == 'blank'):
                print ("previous line is blank")
                print ("moving to subsequent previous line")
                j+=1
            else:
                pnbl = "found"
                print("found previous non-blank line")
                pnbl_line_pos = df['Identification_Status'][index-j].split(";")


        # check if the previous line found is identified or not (unidentified or having more than one possibilities)
        if (pnbl == 'found'):

            if (df['isIdentified'][index-j] == 'No') :
                print("but as previous non-blank line is unidentified so cannot perform pnbl check, so skipping")
            elif len(pnbl_line_pos) > 1:
                print("but as previous non-blank line is unidentified (has more than one possibilties) so cannot perform pnbl check, so skipping")
            else:
                print("AND previous non-blank line is already identified as",df['Identification_Status'][index-j] )
                try:
                    print(df['data'][index-j])
                except:
                    pass
                pnbl_identified_as = df['Identification_Status'][index-j]
                df['pnbl_identified_as'][index] = pnbl_identified_as
                try:
                    line_pos_using_pnbl = sorted(list(pnbl_df.loc[pnbl_df[pnbl_identified_as] > 0,pnbl_identified_as].index),key=takeNumeric)
                    line_pos_string = ';'.join([str(elem) for elem in line_pos_using_pnbl])
                    df['pos_using_pnbl'][index] = line_pos_string
                except:
                    print("pnbl weights sheet does not have column",pnbl_identified_as)
                    pass


        ## for unidentified line find the next-line which is non-blank
        j=1
        nnbl = 'not found'
        print("looking for next non-blank line")
        while (nnbl == 'not found') and ((index+j) < (len(df))):
            if (df['Identification_Status'][index+j] == 'blank'):
                print ("next line is blank")
                print ("moving to subsequent next line")
                j+=1
            else:
                nnbl = "found"
                print("found next non-blank line")
                nnbl_line_pos = df['Identification_Status'][index+j].split(";")

        # check if the line found is identified or not
        if (nnbl == 'found'):
            if (df['isIdentified'][index+j] == 'No'):
                print("but as next non-blank line is unidentified so cannot perform nnbl check, so skipping")
            elif len(nnbl_line_pos)>1:
                print("but as next non-blank line is unidentified(has multiple possibilties) so cannot perform nnbl check, so skipping")

            else:

                print("AND next non-blank line is already identified as",df['Identification_Status'][index+j] )
                try:
                    print(df['data'][index+j])
                except:
                    pass
                nnbl_identified_as = df['Identification_Status'][index+j]
                df['nnbl_identified_as'][index] = nnbl_identified_as
                try:
                    line_pos_using_nnbl = sorted(list(nnbl_df.loc[nnbl_df[nnbl_identified_as] > 0,nnbl_identified_as].index),key=takeNumeric)
                    line_pos_string = ';'.join([str(elem) for elem in line_pos_using_nnbl])
                    df['pos_using_nnbl'][index] = line_pos_string
                except:
                    print("nnbl weights sheet does not have column",nnbl_identified_as)
                    pass


        ## now get the combined possibility , doing this for unidentified lines
        if(df['isIdentified'][index] == 'No') or len(cur_line_pos)> 1:
            set_a = set(line_pos_using_pnbl)
            set_b = set(line_pos_using_nnbl)


            ## for some lines having mulitple possibilites some possibilities might get eliminated
            ## so take intersection with current possibilites
            if len(cur_line_pos)> 1:

                set_c = set(cur_line_pos)
                pos_using_pnbl_nnbl = sorted(set.intersection(set_a,set_b,set_c))


            else:
                pos_using_pnbl_nnbl = sorted(set.intersection(set_a,set_b))

            ## append the top and other posibility back to the beginning list if it got eliminated in the intersection
            ## append the  posibility which were not to be removed back to the list if it got eliminated in the intersection
            try:
                pos_not_to_remove = df['ps_not_to_remove'][index].split(";")
            except:
                pos_not_to_remove = []
            for ps in pos_not_to_remove:
                if ps not in pos_using_pnbl_nnbl:
                    pos_using_pnbl_nnbl.insert(0,ps)


            print("pos_using_pnbl_nnbl is ",pos_using_pnbl_nnbl)
            ## if only one poss then that means identified
            if len(pos_using_pnbl_nnbl) == 1:
                new_lines_identified = True
                identify_using = 'Identification_Status'
                count_lines_identified += 1
                line_nos_identified.append(df['line_no'][index])
                df['When_Identified'][index] = 'PNBL_NNBL'

            ## append the weight to the possibilites
            pos_with_weights = []
            for pos in pos_using_pnbl_nnbl:
                wt = ''
                pos_wt = str(pos)
                try:
                    wt = df[pos_wt][index]

                except:
                    print("could not find weight for pos ",pos," at index ",index)
                    continue

                try:
                    wt = int(wt)
                    pos_wt +=  '-' + str(wt)
                except:
                    print("could not convert wt to int for pos ",pos," at index ",index)
                    continue

                pos_with_weights.append(pos_wt)

            print("pos_with_weights is ",pos_with_weights)
            # now sort in descending order using the weights as key
            pos_with_weights = sorted(pos_with_weights,key=useWeights , reverse = True)
            print("sorted pos_with_weights is ",pos_with_weights)
            line_pos_string_with_weights = ';'.join([str(elem) for elem in pos_with_weights])

            df['Identification_Status_with_weights'][index] = line_pos_string_with_weights

            ## copy over to identification status without the weights but in order of decreasing weights
            pos_without_weight = []
            for pos in pos_with_weights:
                pos_without_weight.append(pos.split("-")[0])

            line_pos_string = ';'.join([str(elem) for elem in pos_without_weight])
            print("line new possibilites",line_pos_string)
            df['pos_using_pnbl_nnbl'][index] = line_pos_string

            df['Identification_Status'][index] = line_pos_string
            print(df['Identification_Status'][index])

            ##write the count of possibilities to a column, make new column for each iteration
            df[pos_count_column_name][index] = len(pos_without_weight)
            total_pos_after += len(pos_without_weight)


    print( "new lines identified :" ,new_lines_identified)
    print(total_pos_before,total_pos_after)

    if (total_pos_before - total_pos_after) > 0:
        pos_decreased = True
    else:
        pos_decreased = False

    return df,new_lines_identified,identify_using,count_lines_identified,line_nos_identified,pos_decreased


def remove_ineligible_pos(df,identify_using,iteration):


    def useWeights(ps):
        return int(ps.split("-")[1])

    def check_pos_eligibility(pos,pvs_line_pos,nxt_line_pos,first_line_flag,last_line_flag):
        pos_eligible = False
        pos_eligible = ps_conditions_dict.get(pos,pos)
    #     if pos == 'ps7':
    #         print(pvs_line_pos)
    #         print(nxt_line_pos)
    #         pos_eligible = 'ps7'  if (any(ps in pvs_line_pos for ps in ['blank']) or first_line_flag) and (any(ps in nxt_line_pos for ps in ['ps10','ps11','ps13']) or last_line_flag) else False
    #     else :
    #         pos_eligible = pos

        return pos_eligible

    count_lines_identified = 0
    pos_count_column_name = 'CountofEligiblePossibilities_afterIteration' + str(iteration)
    df[pos_count_column_name] = ''
    new_lines_identified = False
    total_pos_before = 0
    total_pos = 0
    pos_decreased = False
    line_not_identified = False


    for index in df.index:
    #for index in range(5,8):

        total_pos_before += len(df[identify_using][index].split(";"))
        line_not_identified = True if (len(df[identify_using][index].split(";")) > 1) else False
        print (index,line_not_identified)
        if line_not_identified :
            line_pos = df[identify_using][index].split(";")
            pvs_line_pos = []
            nxt_line_pos = []
            first_line_flag = False
            last_line_flag = False


            if index == 0:
                first_line_flag = True
                nxt_line_pos = df[identify_using][index+1].split(";")
            elif index == df.index[-1]:
                pvs_line_pos = df[identify_using][index-1].split(";")
                last_line_flag = True
            else:
                pvs_line_pos = df[identify_using][index-1].split(";")
                nxt_line_pos = df[identify_using][index+1].split(";")

            line_eligible_pos = []

            print('\n')
            print (index)
            print(pvs_line_pos)
            print(nxt_line_pos)

            ps_conditions_dict = {
        'ps1': 'ps1'  if (any(ps in pvs_line_pos for ps in ['blank','ps6','ps15','ps16','ps17']) or first_line_flag) and (any(ps in nxt_line_pos for ps in ['blank','ps4','ps6']) or last_line_flag) else False,
        'ps2': 'ps2'  if (any(ps in pvs_line_pos for ps in ['blank','ps6','ps15','ps16','ps17']) or first_line_flag) and (any(ps in nxt_line_pos for ps in ['ps3']) or last_line_flag) else False,
        'ps3': 'ps3'  if (any(ps in pvs_line_pos for ps in ['ps2']) or first_line_flag) and (any(ps in nxt_line_pos for ps in ['blank','ps4','ps6']) or last_line_flag) else False,
        'ps4': 'ps4'  if (any(ps in pvs_line_pos for ps in ['blank','ps1','ps3','ps15']) or first_line_flag) and (any(ps in nxt_line_pos for ps in ['blank','ps5','ps6']) or last_line_flag) else False,
        'ps5': 'ps5'  if (any(ps in pvs_line_pos for ps in ['ps4','ps5']) or first_line_flag) and (any(ps in nxt_line_pos for ps in ['ps5','ps6']) or last_line_flag) else False,
        'ps6': 'ps6'  if (any(ps in pvs_line_pos for ps in ['blank','ps1','ps3','ps4','ps5','ps15']) or first_line_flag) and (any(ps in nxt_line_pos for ps in ['blank','ps1','ps2','ps4','ps6','ps7','ps8','ps16']) or last_line_flag) else False,
        'ps7': 'ps7'  if (any(ps in pvs_line_pos for ps in ['blank','ps6']) or first_line_flag) and (any(ps in nxt_line_pos for ps in ['blank','ps10','ps11','ps13','ps15']) or last_line_flag) else False,
        'ps8': 'ps8'  if (any(ps in pvs_line_pos for ps in ['blank','ps6']) or first_line_flag) and (any(ps in nxt_line_pos for ps in ['ps9','ps10','ps11','ps13','ps15']) or last_line_flag) else False,
        'ps9': 'ps9'  if (any(ps in pvs_line_pos for ps in ['ps7']) or first_line_flag) and (any(ps in nxt_line_pos for ps in ['ps10','ps11','ps13','ps15']) or last_line_flag) else False,
        'ps10':'ps10' if (any(ps in pvs_line_pos for ps in ['ps7','ps8','ps9']) or first_line_flag) and (any(ps in nxt_line_pos for ps in ['ps13','ps15']) or last_line_flag) else False,
        'ps11':'ps11' if (any(ps in pvs_line_pos for ps in ['ps7','ps8','ps9','ps14','blank']) or first_line_flag) and (any(ps in nxt_line_pos for ps in ['ps12','ps26']) or last_line_flag) else False,
        'ps12':'ps12' if (any(ps in pvs_line_pos for ps in ['ps11']) or first_line_flag) and (any(ps in nxt_line_pos for ps in ['ps13']) or last_line_flag) else False,
        'ps13':'ps13' if (any(ps in pvs_line_pos for ps in ['blank','ps7','ps8','ps9','ps10','ps12']) or first_line_flag) and (any(ps in nxt_line_pos for ps in ['blank','ps14','ps15']) or last_line_flag) else False,
        'ps14':'ps14' if (any(ps in pvs_line_pos for ps in ['ps13','ps14','blank']) or first_line_flag) and (any(ps in nxt_line_pos for ps in ['ps14','ps15','blank']) or last_line_flag) else False,
        'ps15':'ps15' if (any(ps in pvs_line_pos for ps in ['ps7','ps8','ps9','ps10','ps12','ps13','ps14','blank']) or first_line_flag) and (any(ps in nxt_line_pos for ps in ['blank','ps1','ps3','ps4','ps6','ps7','ps8','ps16']) or last_line_flag) else False,
        'ps16':'ps16' if (any(ps in pvs_line_pos for ps in ['blank','ps6','ps15']) or first_line_flag) and (any(ps in nxt_line_pos for ps in ['blank','ps1','ps3']) or last_line_flag) else False,
        'ps17':'ps17' if (any(ps in pvs_line_pos for ps in ['blank','ps1','ps3','ps6','ps15','ps16','ps17','ps18']) or first_line_flag) and (any(ps in nxt_line_pos for ps in ['ps1','ps2','ps8','ps16','ps17','ps18','ps19','blank']) or last_line_flag) else False,
        'ps18':'ps18' if (any(ps in pvs_line_pos for ps in ['blank','ps6','ps15','ps16']) or first_line_flag) and (any(ps in nxt_line_pos for ps in ['blank','ps5','ps16']) or last_line_flag) else False,
        'ps19':'ps19' if (any(ps in pvs_line_pos for ps in ['blank','ps15','ps6']) or first_line_flag) and (any(ps in nxt_line_pos for ps in ['blank','ps4','ps6']) or last_line_flag) else False,
        'ps21':'ps21' if (any(ps in pvs_line_pos for ps in ['blank','ps1','ps3','ps4','ps5','ps15']) or first_line_flag) and (any(ps in nxt_line_pos for ps in ['ps10','ps11','ps13']) or last_line_flag) else False,
        'ps22':'ps22' if (any(ps in pvs_line_pos for ps in ['blank','ps1','ps3','ps4','ps5','ps15']) or first_line_flag) and (any(ps in nxt_line_pos for ps in ['blank','ps1','ps3']) or last_line_flag) else False,
        'ps25':'ps25' if (any(ps in pvs_line_pos for ps in ['blank','ps6']) or first_line_flag) and (any(ps in nxt_line_pos for ps in ['ps12','ps13']) or last_line_flag) else False,
        'ps26':'ps26' if (any(ps in pvs_line_pos for ps in ['ps7','ps8','ps9','ps11','blank','ps27']) or first_line_flag) and (any(ps in nxt_line_pos for ps in ['ps14','ps15','ps27']) or last_line_flag) else False,
        'ps27':'ps27' if (any(ps in pvs_line_pos for ps in ['ps7','ps8','ps9','ps10','ps12','ps13','ps14','ps26','blank']) or first_line_flag) and (any(ps in nxt_line_pos for ps in ['ps26','ps14','ps15']) or last_line_flag) else False,
        'ps28':'ps28' if (any(ps in pvs_line_pos for ps in ['ps7','ps8','ps9','ps10','ps12','ps13','ps14','blank']) or first_line_flag) and (any(ps in nxt_line_pos for ps in ['blank','ps9','ps10','ps11','ps13','ps15']) or last_line_flag) else False,
        'ps29':'ps29' if (any(ps in pvs_line_pos for ps in ['ps7','ps8','ps9','ps10','ps12','ps13','ps14']) or first_line_flag) and (any(ps in nxt_line_pos for ps in ['blank','ps1','ps2','ps4','ps6','ps7','ps8','ps16']) or last_line_flag) else False,
        'ps30':'ps30' if (any(ps in pvs_line_pos for ps in ['blank','ps6','ps7']) or first_line_flag) and (any(ps in nxt_line_pos for ps in ['ps14','ps15','blank']) or last_line_flag) else False,
        'ps31':'ps31' if (any(ps in pvs_line_pos for ps in ['blank','ps15','ps6']) or first_line_flag) and (any(ps in nxt_line_pos for ps in ['blank','ps1','ps2']) or last_line_flag) else False
       }
            print("current possibilities",line_pos)
            for pos in line_pos:
                #print (pos)
                pos_checked = check_pos_eligibility(pos,pvs_line_pos,nxt_line_pos,first_line_flag,last_line_flag)
                if pos_checked:
                    line_eligible_pos.append(pos_checked)

            print("eligible possibilities",line_eligible_pos)
            ## append back the possibilties (top and other which are not to be removed)
            ## append the top and other posibility back to the beginning list if it got eliminated in the intersection

            try:
                pos_not_to_remove = df['ps_not_to_remove'][index].split(";")
            except:
                pos_not_to_remove = []
            for ps in pos_not_to_remove:
                if ps not in line_eligible_pos:
                    line_eligible_pos.insert(0,ps)


            print (";".join(line_pos))
            print (";".join(line_eligible_pos))
            df['Identification_Status_ineligible_removed'][index] = ";".join(line_eligible_pos)
            df['CountofPossibilities_afterIneligibleRemoved'][index] = len(line_eligible_pos)

            ##write the count of possibilities to a column, make new column for each iteration
            df[pos_count_column_name][index] = len(line_eligible_pos)
            total_pos += len(line_eligible_pos)
            if len(line_eligible_pos) == 1:
                count_lines_identified +=1
                new_lines_identified = True
                df['When_Identified'][index] = 'RemovingIneligiblePossibilities'

#             if len(line_pos) - len(line_eligible_pos) > 0:
#                 pos_decreased = True

        else:
            df['Identification_Status_ineligible_removed'][index] = df[identify_using][index]
            total_pos += 1
            continue


        ## copy over the inelgible removed to Identification Status and sort in decreasing order of weights
        ##df.loc[:,'Identification_Status'] = df.loc[:,'Identification_Status_ineligible_removed']
        pos_eligible = df['Identification_Status_ineligible_removed'][index].split(";")
        ## append the weight to the possibilites
        print("test")
        pos_with_weights = []
        for pos in pos_eligible:

            wt = ''
            pos_wt = str(pos)
            try:
                wt = df[pos_wt][index]

            except:
                print("could not find weight for pos ",pos," at index ",index)
                continue

            try:
                wt = int(wt)
                pos_wt +=  '-' + str(wt)
            except:
                print("could not convert wt to int for pos ",pos," at index ",index)
                continue

            pos_with_weights.append(pos_wt)

        # now sort in descending order using the weights as key
        pos_with_weights = sorted(pos_with_weights,key=useWeights , reverse = True)
        print(pos_with_weights)
        line_pos_string_with_weights = ';'.join([str(elem) for elem in pos_with_weights])

        df['Identification_Status_with_weights'][index] = line_pos_string_with_weights

        ## copy over to identification status without the weights but in order of decreasing weights
        pos_without_weight = []
        for pos in pos_with_weights:
            pos_without_weight.append(pos.split("-")[0])

        line_pos_string = ';'.join([str(elem) for elem in pos_without_weight])
        print(line_pos_string)

        df['Identification_Status_ineligible_removed'][index] = line_pos_string


    if (total_pos_before - total_pos) > 0:
        pos_decreased = True
    else:
        pos_decreased = False

    print(total_pos_before,total_pos)
    return df,new_lines_identified,pos_decreased,count_lines_identified,total_pos


def do_while_pnnbl_ineligible(df):

    ## import the pnbl and nnbl weights
    ##pnbl_df,nnbl_df = prep_pnnbl_wts(csv_pnbl_nnbl)
#     pnbl_df.set_index('Possibilities',inplace= True)
#     nnbl_df.set_index('Possibilities',inplace= True)
    cur_dir = mypath
    pnbl_df = pd.read_csv(os.path.join(cur_dir,'pnbl_weights.csv') , index_col = 'Possibilities' , keep_default_na = False)
    pnbl_df = pnbl_df.head(34)
    pnbl_df = pnbl_df.apply(pd.to_numeric,errors ='ignore')

    nnbl_df= pd.read_csv(os.path.join(cur_dir,'nnbl_weights.csv'),index_col = 'Possibilities' , keep_default_na = False )
    nnbl_df = nnbl_df.head(34)
    nnbl_df = nnbl_df.apply(pd.to_numeric,errors ='ignore')


    run_again = True
    total_pos_initial = 0

    for index in df.index:
            total_pos_initial += len(df['Identification_Status'][index].split(";"))

    while run_again:
        ## run the identification using pnbl_nnbl till no new lines get identified
        new_lines_identified = True
        iteration = 1
        line_nos_identified_iteration = []
        line_nos_identified = []

        count_total = 0
        pos_decreased = False


        if 'Identification_Status' in df.columns:

            identify_using = 'Identification_Status'

        else:
            identify_using = 'stage-1_output'

        if 'Identification_Status_with_weights' not in df:
            df['Identification_Status_with_weights'] = ''

        if 'pnbl_identified_as' not in df:
            df['pnbl_identified_as'] = ''

        if 'pos_using_pnbl' not in df:
            df['pos_using_pnbl'] = ''

        if 'nnbl_identified_as' not in df:
            df['nnbl_identified_as'] = ''

        if 'pos_using_nnbl' not in df:
            df['pos_using_nnbl'] = ''

        if 'pos_using_pnbl_nnbl' not in df:
            df['pos_using_pnbl_nnbl'] = ''

        while new_lines_identified or pos_decreased:
            print("Identifying lines using pnbl_nnbl ")
            print("using:",identify_using)
            df,new_lines_identified,identify_using,count,line_nos_identified_iteration,pos_decreased = identify_using_pnbl_nnbl(df,identify_using,iteration)

            #df.to_csv( 'After_Iteration' + str(iteration) + '.csv')
            print("New lines identified in Iteration",iteration,": ",count)
            iteration += 1
            count_total+= count
            line_nos_identified.append(line_nos_identified_iteration)
            print("lines identified in iteration",line_nos_identified)


        print(df['Identification_Status'].value_counts())

        print ("Total new lines identified in pnbl nnbl after all iteration:",count_total)
        print ("line nos identified in all iterations",line_nos_identified)

        ## run the identification by eliminating possibilities no new lines get identified
        new_lines_identified = True
        pos_decreased = True
        iteration = 1

        count_total = 0
        total_pos_start = 0
        total_pos_after = 0

        identify_using = 'Identification_Status'

        if 'Identification_Status_ineligible_removed' not in df.columns:

            df['Identification_Status_ineligible_removed'] = ''
            df['CountofPossibilities_afterIneligibleRemoved'] = ''


        for index in df.index:
            total_pos_start += len(df['Identification_Status'][index].split(";"))

#         df.to_csv('test_pnnbl.csv')

        while new_lines_identified or pos_decreased:
            print("\n Identifying lines using eliminating ineligible possibilities ")
            print("using:",identify_using)
            df,new_lines_identified,pos_decreased,count,total_pos_after = remove_ineligible_pos(df,identify_using,iteration)

            #df.to_csv( 'eligiblePossibilitiesAfter_Iteration' + str(iteration) + '.csv')
            print("New lines identified in Iteration",iteration,": ",count)
            identify_using = 'Identification_Status_ineligible_removed'
            iteration += 1
            count_total+= count

        print ("Total new lines identified by eliminating ineligible possibilities after all iteration:",count_total)

        ## copy over column
        df['Identification_Status'] = df['Identification_Status_ineligible_removed']
        print(df['Identification_Status'].value_counts())

        print(total_pos_start,total_pos_after,iteration)


        ## run both pnnbl and pos ineligible if
        ## pos is decreased using ineligble code
        run_again = True if total_pos_start > total_pos_after else False


    print(total_pos_initial,total_pos_after)
    return df

def examine_same_content_lines(df):
    df_udn = df.loc[df['isIdentified'] == 'No', : ]

    df_udn['data_strip'] = df_udn['data'].str.strip()

    df_occurences = df_udn.value_counts(['data_strip']).reset_index(name='count')

    pos_sp_list = df_occurences.loc[df_occurences['count'] > 1,'data_strip'].to_list()


    for pos_sp in pos_sp_list:
        print (pos_sp)
        for index in df.index:
            if df['isIdentified'][index] == 'Yes':
                continue
            if 'ps7' not in df['Identification_Status'][index].split(";"):
                continue
            #df['data'][index].strip

            ## preceeded by

            prev_line_blank = True if df['plb'][index] == 'Y' else False


            if prev_line_blank:
                if index - 2 >= 0:
                    pnbl = index - 2
                else:
                    print("start of script \n")
                    continue
            else:
                if index - 1 >= 0:
                    pnbl = index - 1
                else:
                    print("start of script \n")
                    continue


            if df['data'][index].strip() == pos_sp and df['Identification_Status'][pnbl] == ('ps15' or 'ps6'):
                print(index)
                try:
                    print(df['data'][pnbl])
                except:
                    pass

                print(df['Identification_Status'][pnbl])
                if df['parenthetical'][index] == 'Absent':
                    df['Identification_Status'][index] == 'ps7'
                    df['isIdentified'][index] = 'Yes'
                    df['When_Identified'][index] = 'ExaminingSameContentLines'
                else:
                    df['Identification_Status'][index] == 'ps8;ps25'

    return df

def examine_action_possibilities_part1(df):

    # loop through the lines and check lines possibility for being action
    for index in df.index[2:-2]:
        if df['isIdentified'][index] == 'Yes':
            continue

        print("unidentified line index is",index)
        cur_line_indent = df['ssc'][index]
        next_line_blank = df['nlb'][index]
        prev_line_blank = df['plb'][index]

        if next_line_blank == 'N':
            next_nbl_line_indent = df['ssc'][index+1]
            next_nbl_line_pos = df['Identification_Status'][index+1].split(";")
        else:
            next_nbl_line_indent = df['ssc'][index+2]
            next_nbl_line_pos = df['Identification_Status'][index+2].split(";")

        if prev_line_blank == 'N':

            prev_nbl_line_indent = df['ssc'][index-1]
            prev_nbl_line_pos = df['Identification_Status'][index-1].split(";")
        else:
            prev_nbl_line_indent = df['ssc'][index-2]
            prev_nbl_line_pos = df['Identification_Status'][index-2].split(";")


        ## check for ps5,ps4
        if cur_line_indent >=15 and cur_line_indent <=25:
            data = df['data'][index]
    #         if cur_line_indent == prev_nbl_line_indent and cur_line_indent == next_nbl_line_indent:
    #             ps4_in_prev = True if 'ps4' in prev_nbl_line_pos[0] else False
    #             ps5_in_prev = True if 'ps5' in prev_nbl_line_pos[0] else False
    #             ps5_in_next = True if 'ps5' in next_nbl_line_pos else False
    #             ps6_in_next = True if 'ps6' in next_nbl_line_pos else False
    #             ps16_in_prev = True if 'ps16' in prev_nbl_line_pos[0] else False
    #             ps17_in_prev = True if 'ps17' in prev_nbl_line_pos[0] else False

    #             next_line_flag = True if next_line_blank == 'Y' else False
    #             prev_action_special_transition = True if any([ps4_in_prev,ps5_in_prev,ps16_in_prev,ps17_in_prev]) else False
    #             prev_action = True if any([ps4_in_prev,ps5_in_prev]) else False

    #             if all([ps5_in_next,ps6_in_next,prev_action_special_transition]):
    #                 cur_line_new_pos = 'ps1;ps5;ps4'
    #                 df['Identification_Status'][index] =  cur_line_new_pos
    #                 df['When_Identified'][index] = 'ExaminingActionPossibilities'
    #                 print(data)
    #                 print(cur_line_new_pos)
    #                 print("\n")
    #                 continue

    #             elif prev_action and ((ps5_in_next and ps6_in_next) or next_line_flag):
    #                 cur_line_new_pos = 'ps6;ps5;ps4'
    #                 df['Identification_Status'][index] =  cur_line_new_pos
    #                 df['When_Identified'][index] = 'ExaminingActionPossibilities'
    #                 print(data)
    #                 print(cur_line_new_pos)
    #                 print("\n")
    #                 continue

    #             elif prev_action and (ps5_in_next and ps6_in_next) :
    #                 cur_line_new_pos = 'ps5;ps4'
    #                 df['Identification_Status'][index] =  cur_line_new_pos
    #                 df['When_Identified'][index] = 'ExaminingActionPossibilities'
    #                 print(data)
    #                 print(cur_line_new_pos)
    #                 print("\n")
    #                 continue

            ## check for ps6
            if len(prev_nbl_line_pos) == 1 and cur_line_indent == prev_nbl_line_indent and next_nbl_line_pos[0] == 'ps7':
                ps4_in_prev = True if 'ps4' == prev_nbl_line_pos[0] else False
                ps5_in_prev = True if 'ps5' == prev_nbl_line_pos[0] else False
                if any([ps4_in_prev,ps5_in_prev]):
                    cur_line_new_pos = 'ps6'
                    df['Identification_Status'][index] =  cur_line_new_pos
                    df['When_Identified'][index] = 'ExaminingActionPossibilities'
                    try:
                        print(data)
                    except:
                        pass
                    print(cur_line_new_pos)
                    print("\n")
                    continue

    return df

def examine_action_possibilities_part2(df):

    # loop through the lines and check lines possibility for being action
    for index in df.index[2:-2]:
        if df['isIdentified'][index] == 'Yes':
            continue
        cur_line_pos = df['Identification_Status'][index].split(";")

        if cur_line_pos[0] == 'ps1':
            continue

        if df['plb'][index] == 'N' :
            pnbl_pos = df['Identification_Status'][index-1].split(";")
        else:
            pnbl_pos = df['Identification_Status'][index-2].split(";")

        if df['nlb'][index] == 'N' :
            nnbl_pos = df['Identification_Status'][index+1].split(";")
        else:
            nnbl_pos = df['Identification_Status'][index+2].split(";")


        line_no = df['line_no'][index]
        data = df['data'][index]

        ## declare ps6 if nnbl ps7 and pnbl has either ps1 ,ps3, ps15, ps6 as possibility
        if len(pnbl_pos) == 1 and len(nnbl_pos) == 1 and nnbl_pos[0] == 'ps7':
            ps1_equal_prev = True if 'ps1' == pnbl_pos[0] else False
            ps3_equal_prev = True if 'ps3' == pnbl_pos[0] else False
            ps6_equal_prev = True if 'ps6' == pnbl_pos[0] else False
            ps15_equal_prev = True if 'ps15' == pnbl_pos[0] else False
            if any([ps1_equal_prev,ps3_equal_prev,ps6_equal_prev,ps15_equal_prev]):
                print("Identifying line as ps6 as before speaker and after 1,3,6, 15",)
                try:
                    print(line_no , data)
                except:
                    pass
                df['Identification_Status'][index] = 'ps6'
                df['When_Identified'][index] = 'ExaminingActionPossibilitiesAfterIneligible'
                df['isIdentified'][index] = 'Yes'
                continue

    # loop through to examine for ps5
    for index in df.index[2:-2]:
        if df['Identification_Status'][index] == 'blank':
            continue
        if len(df['Identification_Status'][index].split(";")) == 1 :
            continue


        if df['plb'][index] == 'N' :
            pnbl_pos = df['Identification_Status'][index-1].split(";")
        else:
            pnbl_pos = df['Identification_Status'][index-2].split(";")

        if df['nlb'][index] == 'N' :
            nnbl_pos = df['Identification_Status'][index+1].split(";")
        else:
            nnbl_pos = df['Identification_Status'][index+2].split(";")

        line_no = df['line_no'][index]
        data = df['data'][index]

        ## declare ps5 if prev ps4,ps5 and next ps5,ps6
        if pnbl_pos in ('ps4','ps5') and nnbl_pos in ('ps5','ps6'):
            print("Identifying line as ps5 as between 4,5 and 5,6")
            df['Identification_Status'][index] = 'ps5'
            df['When_Identified'][index] = 'ExaminingActionPossibilitiesAfterIneligible'
            df['isIdentified'][index] = 'Yes'
            continue

         ### declare ps5 if prev has a possibility as ps4,ps5 and next ps5,ps6
    #     ps4_in_prev = True if 'ps4' in pnbl_pos else False
    #     ps5_in_prev = True if 'ps5' in pnbl_pos else False
    #     ps5_in_next = True if 'ps5' in nnbl_pos else False
    #     ps6_in_next = True if 'ps6' in nnbl_pos else False

    #     if any([ps4_in_prev,ps5_in_prev]) and any([ps5_in_next,ps6_in_next]):
    #         print("Identifying line as ps5 in between possibilities of 4,5 and 5,6")
    #         df['Identification_Status'][index] = 'ps5'
    #         df['When_Identified'][index] = 'ExaminingActionPossibilitiesAfterIneligible'
    #         continue

    return df

def examine_same_indent_bunch(df):


    total_pos_before = 0
    total_pos_after = 0

    for index in df.index:
        line_pos = df['Identification_Status'][index].split(";")
        total_pos_before += len(line_pos)


    index_iter = iter(df.index)

    for index in index_iter:
    #     print("index",index)
    #     print(df['Identification_Status'][index])
    #     print(len(df['Identification_Status'][index].split(";")))
        line_pos = df['Identification_Status'][index].split(";")
        if len(line_pos) == 1:
            continue
        print(index)
        cur_indent = df['ssc'][index]

        next_line_blank = True if df['nlb'][index] == 'Y' else False

        if index+2 > df.index[-1]:
            break

        if next_line_blank:
            next_nbl_indent = df['ssc'][index+2]
            nbl_identified = True if len(df['Identification_Status'][index+2].split(";")) == 1 else False
            j = index + 2
            lines_count = 2

        else:
            next_nbl_indent = df['ssc'][index+1]
            nbl_identified = True if len(df['Identification_Status'][index+1].split(";")) == 1 else False
            j = index + 1
            lines_count = 1

        start_index = index
        rev_index = index
        nbl_lines_count = 1
        data = df['data'][index]

        print("lines with same indent")
        try:
            print(data)
        except:
            pass

        bunch_index = []
        bunch_index.append(start_index)

        ## get the number of line with same indent
        while cur_indent == next_nbl_indent and not nbl_identified:

            nbl_lines_count +=1
            cur_indent = next_nbl_indent

            next_line_blank = True if df['nlb'][j] == 'Y' else False
            data = df['data'][j]
            try:
                print(data)
            except:
                pass

            bunch_index.append(j)
            rev_index = j

            if j+2 >= df.index[-1]:
                break

            if next_line_blank:
                next_nbl_indent = df['ssc'][j+2]
                j += 2

            else:
                next_nbl_indent = df['ssc'][j+1]
                j += 1

        print(nbl_lines_count)

        ## preceeded by

        prev_line_blank = True if df['plb'][index] == 'Y' else False


        if prev_line_blank:
            if start_index - 2 >= 0:
                pnbl = start_index - 2
            else:
                print("start of script \n")
                continue
        else:
            if start_index - 1 >= 0:
                pnbl = start_index - 1
            else:
                print("start of script \n")
                continue

        print("preceeded by",df['Identification_Status'][pnbl])
        try:
            print(df['data'][pnbl])
        except:
            pass


        ## followed by

        next_line_blank = True if df['nlb'][rev_index] == 'Y' else False


        if next_line_blank:
            if rev_index + 2 <= df.index[-1]:
                nnbl = rev_index + 2
            else:
                print("end of script \n")
                continue
        else:
            if rev_index + 1 <= df.index[-1]:
                nnbl = rev_index + 1
            else:
                print("end of script \n")
                continue

        try:
            print(df['data'][nnbl])
        except:
            pass
        print("followed by",df['Identification_Status'][nnbl])

        print("\n")

        if df['Identification_Status'][pnbl] == 'ps15' and df['Identification_Status'][nnbl] == 'ps7':
            last_line_pos = df['Identification_Status'][rev_index].split(";")

            if nbl_lines_count == 1 and len(df['Identification_Status'][rev_index].split(";")) > 1:
                if line_pos[0] == 'ps1':
                    continue
                print("CASE A2")
                # single line is ps6
                df['Identification_Status'][rev_index] = 'ps6'
                df['isIdentified'][rev_index] = 'Yes'
                df['When_Identified'][rev_index] = 'ExaminingSameIndentBunch'
                bunch_index.remove(rev_index)
                print("ps6", df['data'][rev_index])

            elif nbl_lines_count > 1 and len(last_line_pos) > 1 and 'ps15' not in last_line_pos:
                print("CASE A1")
                # last line is ps6
                df['Identification_Status'][rev_index] = 'ps6'
                df['isIdentified'][rev_index] = 'Yes'
                df['When_Identified'][rev_index] = 'ExaminingSameIndentBunch'
                bunch_index.remove(rev_index)
                print("ps6", df['data'][rev_index])

                # rest of lines ; remove possibility other than slugline,transition and action
                bunch_iter = iter(bunch_index)

                # remove possibilities other than action, slug , transition
                ps_not_to_remove = ['ps1','ps2','ps3','ps4','ps5','ps6','ps16','ps18','ps19']


                for k in bunch_iter:
                    cur_line_pos = df['Identification_Status'][k].split(";")
                    print(cur_line_pos)
                    new_line_pos = []
                    for pos in cur_line_pos:
                        if pos in ps_not_to_remove:
                            new_line_pos.append(pos)

                    df['Identification_Status'][k] = ";".join(new_line_pos)
                    df['When_Identified'][k] = 'ExaminingSameIndentBunch'
                    print(df['Identification_Status'][k], df['data'][k])


    #         if nbl_lines_count > 1:
    #             print("CASE A")
    #             # last line is ps6
    #             df['Identification_Status'][rev_index] = 'ps6'
    #             df['When_Identified'][rev_index] = 'ExaminingSameIndentBunch'
    #             bunch_index.remove(rev_index)
    #             print("ps6", df['data'][rev_index])

    #             # first line is ps4
    #             df['Identification_Status'][start_index] = 'ps4'
    #             df['When_Identified'][start_index] = 'ExaminingSameIndentBunch'
    #             bunch_index.remove(start_index)
    #             print("ps4", df['data'][start_index])

    #             bunch_iter = iter(bunch_index)

    #             # middle lines are ps5
    #             for k in bunch_iter:
    #                 df['Identification_Status'][k] = 'ps5'
    #                 df['When_Identified'][k] = 'ExaminingSameIndentBunch'
    #                 print("ps5", df['data'][k])

    #     if df['Identification_Status'][pnbl] != 'ps15' and df['Identification_Status'][nnbl] in ('ps4','ps6'):
    #         print("CASE B")
    #         bunch_iter = iter(bunch_index)

    #         # remove possibilities other than action, slug , transition
    #         ps_not_to_remove = ['ps1','ps2','ps3','ps4','ps5','ps6','ps16','ps18','ps19']


    #         for k in bunch_iter:
    #             cur_line_pos = df['Identification_Status'][k].split(";")
    #             print(cur_line_pos)
    #             new_line_pos = []
    #             for pos in cur_line_pos:
    #                 if pos in ps_not_to_remove:
    #                     new_line_pos.append(pos)

    #             df['Identification_Status'][k] = ";".join(new_line_pos)
    #             df['When_Identified'][k] = 'ExaminingSameIndentBunch'
    #             print(df['Identification_Status'][k], df['data'][k])


        for advance in range(start_index,rev_index):
            next(index_iter)

    for index in df.index:
        line_pos = df['Identification_Status'][index].split(";")
        total_pos_after += len(line_pos)

    # In[ ]:

    print(total_pos_before,total_pos_after)

    return df

def examine_relative_indent(df):


    total_pos_before = 0
    total_pos_after = 0


    for index in df.index:
        line_pos = df['Identification_Status'][index].split(";")
        total_pos_before += len(line_pos)


    index_iter = iter(df.index)

    for index in index_iter:
    #     print("index",index)
    #     print(df['Identification_Status'][index])
    #     print(len(df['Identification_Status'][index].split(";")))
        line_pos = df['Identification_Status'][index].split(";")
        if len(line_pos) == 1:
            continue

        data = df['data'][index]
        cur_indent = df['ssc'][index]
        cur_parenthetical_absent = True if df['parenthetical'][index] == 'Absent' else False


        ## preceeded by

        prev_line_blank = True if df['plb'][index] == 'Y' else False


        if prev_line_blank:
            if index - 2 >= 0:
                pnbl = index - 2
            else:
    #             print("start of script \n")
                continue
        else:
            if index - 1 >= 0:
                pnbl = index - 1
            else:
    #             print("start of script \n")
                continue

    #     print("preceeded by",df['Identification_Status'][pnbl])
        pnbl_data = df['data'][pnbl]
        pnbl_indent = df['ssc'][pnbl]
        pnbl_parenthetical_absent = True if df['parenthetical'][pnbl] == 'Absent' else False
    #     print (pnbl,pnbl_indent,pnbl_data)


        ## followed by

        next_line_blank = True if df['nlb'][index] == 'Y' else False


        if next_line_blank:
            if index + 2 <= df.index[-1]:
                nnbl = index + 2
            else:
                print("end of script \n")
                continue
        else:
            if index + 1 <= df.index[-1]:
                nnbl = index + 1
            else:
    #             print("end of script \n")
                continue


        nnbl_data = df['data'][nnbl]
        nnbl_indent = df['ssc'][nnbl]
        nnbl_parenthetical_absent = True if df['parenthetical'][nnbl] == 'Absent' else False
        nnbl_line_pos = df['Identification_Status'][nnbl].split(";")
    #     print("followed by",df['Identification_Status'][nnbl])

    #     print(nnbl,nnbl_indent,nnbl_data)

        ## followed followed by
        next_next_line_blank = True if df['nlb'][nnbl] == 'Y' else False


        if next_next_line_blank:
            if nnbl + 2 <= df.index[-1]:
                nnnbl = nnbl + 2
            else:
                print("end of script \n")
                continue
        else:
            if nnbl + 1 <= df.index[-1]:
                nnnbl = nnbl + 1
            else:
    #             print("end of script \n")
                continue


        nnnbl_data = df['data'][nnnbl]
        nnnbl_indent = df['ssc'][nnnbl]
        nnnbl_parenthetical_absent = True if df['parenthetical'][nnnbl] == 'Absent' else False


        try:

            if cur_indent > nnbl_indent  and nnbl_indent > nnnbl_indent and cur_parenthetical_absent and nnbl_parenthetical_absent and nnnbl_parenthetical_absent:
                try:
                    print(data)
                except:
                    pass
                print("current possibility",line_pos)
                if 'ps7' in line_pos and 'ps7' not in nnbl_line_pos:
                    print("Identifying as ps7")
                    df['Identification_Status'][index] = 'ps7'
                    df['When_Identified'][index] = 'ExaminingRelativeIndent'
                    df['isIdentified'][index] = 'Yes'
                    print("\n")
        except:
            pass
    for index in df.index:
        line_pos = df['Identification_Status'][index].split(";")
        total_pos_after += len(line_pos)


    print(total_pos_before,total_pos_after)


    # total_pos_before = 0
    # total_pos_after = 0

    # for index in df.index:
    #     line_pos = df['Identification_Status'][index].split(";")
    #     total_pos_before += len(line_pos)


    # index_iter = iter(df.index)

    # for index in index_iter:
    # #     print("index",index)
    # #     print(df['Identification_Status'][index])
    # #     print(len(df['Identification_Status'][index].split(";")))
    #     line_pos = df['Identification_Status'][index].split(";")
    #     if len(line_pos) == 1:
    #         continue

    #     data = df['data'][index]
    #     cur_indent = df['data_begins/Space count'][index]
    #     cur_parenthetical_absent = True if df['Parenthetical'][index] == 'Absent' else False


    #     ## preceeded by

    #     prev_line_blank = True if df['prvious_line_blank'][index] == 'Y' else False


    #     if prev_line_blank:
    #         if index - 2 >= 0:
    #             pnbl = index - 2
    #         else:
    # #             print("start of script \n")
    #             continue
    #     else:
    #         if index - 1 >= 0:
    #             pnbl = index - 1
    #         else:
    # #             print("start of script \n")
    #             continue

    # #     print("preceeded by",df['Identification_Status'][pnbl])
    #     pnbl_data = df['data'][pnbl]
    #     pnbl_indent = df['data_begins/Space count'][pnbl]
    #     pnbl_parenthetical_absent = True if df['Parenthetical'][pnbl] == 'Absent' else False
    # #     print (pnbl,pnbl_indent,pnbl_data)


    #     ## followed by

    #     next_line_blank = True if df['next_line_blank'][index] == 'Y' else False


    #     if next_line_blank:
    #         if index + 2 <= df.index[-1]:
    #             nnbl = index + 2
    #         else:
    #             print("end of script \n")
    #             continue
    #     else:
    #         if index + 1 <= df.index[-1]:
    #             nnbl = index + 1
    #         else:
    # #             print("end of script \n")
    #             continue

    #     nnbl_data = df['data'][nnbl]
    #     nnbl_indent = df['data_begins/Space count'][nnbl]
    #     nnbl_parenthetical_absent = True if df['Parenthetical'][nnbl] == 'Absent' else False
    # #     print("followed by",df['Identification_Status'][nnbl])

    # #     print(nnbl,nnbl_indent,nnbl_data)


    #     if cur_indent > pnbl_indent  and cur_indent > nnbl_indent and cur_parenthetical_absent and pnbl_parenthetical_absent and nnbl_parenthetical_absent:
    #         print(data)
    #         print("current possibility",line_pos)
    #         print("Identifying as ps7")
    #         df['Identification_Status'][index] = 'ps7'
    #         df['When_Identified'][index] = 'ExaminingRelativeIndent'
    #         df['Identified'][index] = 'identified'
    #         print("\n")

    # for index in df.index:
    #     line_pos = df['Identification_Status'][index].split(";")
    #     total_pos_after += len(line_pos)

    # # In[ ]:

    # print(total_pos_before,total_pos_after)

    return df

def examine_pos_sp_indent(df,csv_removed_space_between_words,csv_pnnbl_ineligble_after_relative_indent):

    df_indents = pd.read_csv(csv_removed_space_between_words,usecols = ['line_no','ssc'])
    df_indents['ssc'].value_counts().sort_index()
    identification_status = pd.read_csv(csv_pnnbl_ineligble_after_relative_indent, usecols = ['line_no','Identification_Status','isIdentified']  )
    identification_status['line_no'] = identification_status['line_no'].astype(float)
    df_indents = df_indents.merge(identification_status, how = 'inner' , on = 'line_no')
    df_indents['ssc'].value_counts().sort_index()

    df_indents.loc[df_indents['isIdentified'] == 'Yes' , 'ssc'].value_counts().sort_index()

    sp_indents_df = df_indents.loc[df_indents['Identification_Status'] == 'ps7','ssc'].value_counts().sort_values(ascending = False).head(5)
    sp_indents_list = sp_indents_df.index.values.tolist()
    sp_indents_list.sort()
    sp_indents_list
    try:

        pos_sp_indent = sp_indents_list[-1]
    except:
        pos_sp_indent = 200

    margin = 3
    for index in df.index:
        if df['isIdentified'][index] == 'Yes':
            continue
        cur_indent = df['ssc'][index]
        if cur_indent >= pos_sp_indent-margin and cur_indent <= pos_sp_indent+margin:
            data = df['data'][index]
            word_count = len(data.split())
            #print(word_count)
            if 'ps7' in df['Identification_Status'][index] and df['parenthetical'][index] == 'Absent' and word_count <= 2:
                try:
                    print(index,data)
                except:
                    pass
                print("Identifying as speaker")
                df['Identification_Status'][index] = 'ps7'
                df['isIdentified'][index] = 'Yes'
                df['When_Identified'][index] = 'ExaminingPossibleSpeakerIndent'

    return df


def examine_action_middle_possibilities_using_pnnbl_top(df):


    # loop through to examine for ps5
    for index in df.index[2:-2]:
        if df['isIdentified'][index] == 'Yes':
            continue

        if df['plb'][index] == 'N' :
            pnbl_pos = df['Identification_Status'][index-1].split(";")
            pnbl_index = index -1
        else:
            pnbl_pos = df['Identification_Status'][index-2].split(";")
            pnbl_index = index -2

        if df['nlb'][index] == 'N' :
            nnbl_pos = df['Identification_Status'][index+1].split(";")
        else:
            nnbl_pos = df['Identification_Status'][index+2].split(";")

        try:
            if df['plb'][pnbl_index] == 'N' :
                ppnbl_pos = df['Identification_Status'][pnbl_index-1].split(";")
            else:
                ppnbl_pos = df['Identification_Status'][pnbl_index-2].split(";")
        except:
            pass


        line_no = df['line_no'][index]
        data = df['data'][index]
        cur_line_pos = df['Identification_Status'][index].split(";")

    ## from here
    #     ## declare ps5 if prev ps4,ps5 and next ps5,ps6
    #     if cur_line_pos[0] == 'ps5' and pnbl_pos[0] in ('ps4','ps5') and nnbl_pos[0] in ('ps5','ps6'):

    #         ## if cur parenthtical or
    #         if df['Parenthetical'][index] != 'Absent':
    #             print("skipping as current has parenthetical ")
    #             print(line_no,data)
    #             continue

    #         if ppnbl_pos[0] == 'ps16' :
    #             print(" skipping as pre previous top transition")
    #             print(line_no,data)
    #             continue

    #         try:
    #             if ppnbl_pos[1] == 'ps16' or pnbl_pos[1] == 'ps16':
    #                 print("skipping as previous or pre previous top2 transition")
    #                 print(line_no,data)
    #                 continue
    #         except:
    #             pass

    #         print("Identifying line as ps5 as between 4,5 and 5,6")
    #         print(data)
    #         df['Identification_Status'][index] = 'ps5'
    #         df['When_Identified'][index] = 'ExaminingActionMiddlePossibilitiesUsingTopPnnbl'
    #         df['Identified'] = 'identified'
    #         continue

        ## till here

    return df


def examine_speaker_extension(df,audit_df):


    # loop through to examine speaker extensiton top 'ps8'
    for index in df.index[2:-2]:
        if df['isIdentified'][index] == 'Yes':
            continue

    #     if df['prvious_line_blank'][index] == 'N' :
    #         pnbl_pos = df['Identification_Status'][index-1].split(";")
    #         pnbl_index = index -1
    #     else:
    #         pnbl_pos = df['Identification_Status'][index-2].split(";")
    #         pnbl_index = index -2

        if df['nlb'][index] == 'N' :
            nnbl_pos = df['Identification_Status'][index+1].split(";")
            nnbl_par = df['parenthetical'][index+1]
        else:
            nnbl_pos = df['Identification_Status'][index+2].split(";")
            nnbl_par = df['parenthetical'][index+2]

    #     try:
    #         if df['prvious_line_blank'][pnbl_index] == 'N' :
    #             ppnbl_pos = df['Identification_Status'][pnbl_index-1].split(";")
    #         else:
    #             ppnbl_pos = df['Identification_Status'][pnbl_index-2].split(";")
    #     except:
    #         pass


        line_no = df['line_no'][index]
        data = df['data'][index]
        cur_line_pos = df['Identification_Status'][index].split(";")
        cur_line_par = df['parenthetical'][index]
        extn_found = False
        extn_list = ['O.S.','V.O.',"CONT'D","CONT’D",'VOICE']
        for extn in extn_list:
            if extn in str(data):
                extn_found = True
                break
        ## if hishest is ps8
        if cur_line_pos[0] == 'ps8' and cur_line_par == 'PartMidEnd' and nnbl_par == 'Absent' and not extn_found:

            try:
                print(data)
            except:
                pass
            if re.search('\(',data,re.IGNORECASE) :
                pos_starts = re.search('\(',data,re.IGNORECASE).start()
                #pos_end = re.search('(',data,re.IGNORECASE).end()
                before_par = data[:pos_starts]
                after_par = data[pos_starts:]

                print ("Separating Parenthetical")
                print("Identifying as speaker")
                print(index)
                try:
                    print(before_par)
                except:
                    pass
                df['data'][index] = before_par
                df['parenthetical'][index] = 'Absent'
                df['When_Identified'][index] = 'ExaminingSpeakerLines'
                df['case'][index] = 'AllUpper'
                df['Identification_Status'][index] = 'ps7'
                nlb = df['nlb'][index]
                df['nlb'][index] = 'N'

                line_no = df['line_no'][index]
                next_line_no = df['line_no'][index+1]
                new_line_no = (line_no + next_line_no) / 2
                if new_line_no in audit_df.index:
                    new_line_no = (new_line_no + next_line_no)/2

                audit_df.loc[new_line_no] = ''
                audit_df.loc[new_line_no]['line_removed'] = 'No'

                audit_df.loc[line_no]['line_broken_into_multiple_lines'] = 'Separated Speaker and Parenthetical'

                print(df['Identification_Status'][index])
                try:
                    print(after_par)
                except:
                    pass
                print("identifying parenthetical")
                df.loc[index + 0.25] = ''
                df.loc[index + 0.25,'data'] = after_par
                df.loc[index + 0.25,'parenthetical'] = 'Complete'
                df.loc[index + 0.25,'When_Identified'] ='ExaminingSpeakerLines'
                df.loc[index + 0.25,'Identification_Status'] = 'ps10'
                df.loc[index + 0.25,'case'] = ''
                df.loc[index + 0.25,'plb'] = 'N'
                df.loc[index + 0.25,'nlb'] = nlb
                df.loc[index + 0.25,'line_no'] = new_line_no

                df = df.sort_index().reset_index(drop=True)
                continue


    ##now examine the speakers having : or apstrophe after them and separate to new line


    speaker_list = df.loc[df['Identification_Status'] == 'ps7','data'].astype(str)
    speaker_list = [ elem.strip() for elem in speaker_list ]
    speaker_lines_list = df.loc[df['Identification_Status'] == 'ps7','line_no'].to_list()
    unique_speaker_list = []
    speaker_in_two_lines_list = []
    for speaker in speaker_list:
        speaker = speaker.strip()
        #print(speaker)
        if speaker not in unique_speaker_list:
            unique_speaker_list.append(speaker)
            ## strip the blank spaces


    print(unique_speaker_list)

    for index in df.index[2:-2]:
        if df['isIdentified'][index] == 'Yes':
            continue


        line_no = df['line_no'][index]
        data = df['data'][index]
        cur_line_pos = df['Identification_Status'][index].split(";")
        extn_found = False
        extn_list = ['O.S.','V.O.',"CONT'D","CONT’D",'VOICE']
        for extn in extn_list:
            if extn in str(data):
                extn_found = True
                break


        for speaker in unique_speaker_list:

            if re.search(speaker,data) and df['Identification_Status'][index] not in ('ps7','ps8','ps9') :

                # check if speaker is at start of line followed by something (like : apostrpohe)
                print(index)
                pos_starts = re.search(speaker,data,re.IGNORECASE).start()
                pos_end = re.search(speaker,data,re.IGNORECASE).end()
                before_speaker = data[:pos_starts]
                after_speaker = data[pos_end:]
                print("speaker match found")
                try:
                    print("data 4567:", data)
                    print("speaker 4568:",speaker)
                    print("before speaker:",before_speaker)
                    print("after speaker:",after_speaker)
                except:
                    pass
                try:
                    char1_after_speaker = after_speaker.lstrip()[0]
                except:
                    char1_after_speaker = ''

                try:
                    print("char1_after_speaker 4579 :",char1_after_speaker)
                except:
                    pass

                speaker_skip_list = ['MONTAGES','MUSICAL MONTAGES','MORNING','AT HOTEL','TV','ESSENTIALS','ESSENTIAL','LATER']

                ## separate parenthtical if speaker is followed by parenthtical
                if before_speaker.isspace() and char1_after_speaker == '(' and df['parenthetical'][index] == 'PartMidEnd' and not extn_found:
                    print("before speaker inside the if condition:",before_speaker)
                    print ("Seperating Parenthetical")
                    print("Identifying speaker")
                    print(index)
                    df['data'][index] = before_speaker + speaker
                    df['parenthetical'][index] = 'Absent'
                    df['When_Identified'][index] = 'ExaminingSpeakerLines'
                    df['case'][index] = 'AllUpper'
                    df['Identification_Status'][index] = 'ps7'
                    nlb = df['nlb'][index]
                    df['nlb'][index] = 'N'

                    line_no = df['line_no'][index]
                    next_line_no = df['line_no'][index+1]
                    new_line_no = (line_no + next_line_no) / 2
                    if new_line_no in audit_df.index:
                        new_line_no = (new_line_no + next_line_no)/2

                    audit_df.loc[new_line_no] = ''
                    audit_df.loc[new_line_no]['line_removed'] = 'No'

                    audit_df.loc[line_no]['line_broken_into_multiple_lines'] = 'Separated Speaker and Parenthetical'
                    #print(df['Identification_Status'][index])

                    print("identifying parenthetical")
                    df.loc[index + 0.25] = ''
                    df.loc[index + 0.25,'data'] = after_speaker
                    df.loc[index + 0.25,'parenthetical'] = 'Complete'
                    df.loc[index + 0.25,'When_Identified'] ='ExaminingSpeakerLines'
                    df.loc[index + 0.25,'Identification_Status'] = 'ps10'
                    df.loc[index + 0.25,'case'] = ''
                    df.loc[index + 0.25,'plb'] = 'N'
                    df.loc[index + 0.25,'nlb'] = nlb
                    df.loc[index + 0.25,'line_no'] = new_line_no

                    df = df.sort_index().reset_index(drop=True)
                    continue

                elif before_speaker.isspace() and char1_after_speaker == ':' and not extn_found and speaker not in speaker_skip_list:
                    print("before speaker in elif condition 4624:", before_speaker)
                    print ("Seperating : colon dialogue")
                    print("Identifying speaker")
                    print(index)
                    df['data'][index] = before_speaker + speaker
                    df['parenthetical'][index] = 'Absent'
                    df['When_Identified'][index] = 'ExaminingSpeakerLines'
                    df['case'][index] = 'AllUpper'
                    df['Identification_Status'][index] = 'ps7'
                    nlb = df['nlb'][index]
                    df['nlb'][index] = 'N'

                    #print(df['Identification_Status'][index])
                    line_no = df['line_no'][index]
                    next_line_no = df['line_no'][index+1]
                    new_line_no = (line_no + next_line_no) / 2
                    if new_line_no in audit_df.index:
                        new_line_no = (new_line_no + next_line_no)/2

                    audit_df.loc[new_line_no] = ''
                    audit_df.loc[new_line_no]['line_removed'] = 'No'

                    audit_df.loc[line_no]['line_broken_into_multiple_lines'] = 'Separated Speaker and Dialogue seperated by colon:'


                    print("possible dialogue")
                    print(after_speaker)
                    df.loc[index + 0.25] = ''
                    df.loc[index + 0.25,'data'] = after_speaker
                    #df.loc[index + 0.25,'Parenthetical'] = 'Complete'
                    df.loc[index + 0.25,'When_Identified'] ='ExaminingSpeakerLines'
                    df.loc[index + 0.25,'Identification_Status'] = ";".join(cur_line_pos)
                    df.loc[index + 0.25,'case'] = ''
                    df.loc[index + 0.25,'plb'] = 'N'
                    df.loc[index + 0.25,'nlb'] = nlb
                    df.loc[index + 0.25,'line_no'] = new_line_no


                    df = df.sort_index().reset_index(drop=True)
                    continue

                elif before_speaker.isspace() and (char1_after_speaker == '‘' or char1_after_speaker == '"') and not extn_found:
                    print("before speaker in seperating apostrophe:", before_speaker)
                    print ("Seperating apostrophe")
                    print("Identifying speaker")
                    print(index)
                    df['data'][index] = before_speaker + speaker
                    df['parenthetical'][index] = 'Absent'
                    df['When_Identified'][index] = 'ExaminingSpeakerLines'
                    df['case'][index] = 'AllUpper'
                    df['Identification_Status'][index] = 'ps7'
                    nlb = df['nlb'][index]
                    df['nlb'][index] = 'N'

                    line_no = df['line_no'][index]
                    next_line_no = df['line_no'][index+1]
                    new_line_no = (line_no + next_line_no) / 2
                    if new_line_no in audit_df.index:
                        new_line_no = (new_line_no + next_line_no)/2

                    audit_df.loc[new_line_no] = ''
                    audit_df.loc[new_line_no]['line_removed'] = 'No'

                    audit_df.loc[line_no]['line_broken_into_multiple_lines'] = 'Separated Speaker and Dialogue seperated by colon:'

                    #print(df['Identification_Status'][index])

                    print("identifying as parenthetical")
                    df.loc[index + 0.25] = ''
                    df.loc[index + 0.25,'data'] = '(' + after_speaker.strip() + ')'
                    df.loc[index + 0.25,'parenthetical'] = 'Complete'
                    df.loc[index + 0.25,'When_Identified'] ='ExaminingSpeakerLines'
                    df.loc[index + 0.25,'Identification_Status'] = 'ps10'
                    df.loc[index + 0.25,'case'] = ''
                    df.loc[index + 0.25,'plb'] = 'N'
                    df.loc[index + 0.25,'nlb'] = nlb
                    df.loc[index + 0.25,'line_no'] = new_line_no

                    df = df.sort_index().reset_index(drop=True)
                    continue

#     df.to_csv(p.output_file_path,index=False)

#     lines_not_removed = audit_df.loc[audit_df['line_removed'] != 'Yes'].index.to_list()
#     audit_df.sort_index(inplace= True)
#     audit_df.reset_index(inplace= True)

#     for line in lines_not_removed:
#         new_data = ''
#         try:
#             new_data =df.loc[df['line_no'] == line, 'data'].values[0]
#         except:
#             pass
#         #print(new_data)
#         audit_df.loc[audit_df['line_no'] == line, 'data_corrected'] = new_data
#         #print(audit_df.loc[audit_df['line_no'] == line, 'data_corrected'])


    return df


def examine_action_using_top2_part1(df):


    # loop through
    for index in df.index[2:-2]:
        if df['isIdentified'][index] == 'Yes':
            continue


        cur_indent = df['ssc'][index]
        nnbl_indent = 0

        if df['plb'][index] == 'N' :
            pnbl_pos = df['Identification_Status'][index-1].split(";")
            pnbl_index = index -1
        else:
            pnbl_pos = df['Identification_Status'][index-2].split(";")
            pnbl_index = index -2

        if df['nlb'][index] == 'N' :
            nnbl_pos = df['Identification_Status'][index+1].split(";")
            nnbl_par = df['parenthetical'][index+1]
            nnbl_indent = df['ssc'][index+1]
        else:
            nnbl_pos = df['Identification_Status'][index+2].split(";")
            nnbl_par = df['parenthetical'][index+2]
            nnbl_indent = df['ssc'][index+2]

    #     try:
    #         if df['prvious_line_blank'][pnbl_index] == 'N' :
    #             ppnbl_pos = df['Identification_Status'][pnbl_index-1].split(";")
    #         else:
    #             ppnbl_pos = df['Identification_Status'][pnbl_index-2].split(";")
    #     except:
    #         pass


        line_no = df['line_no'][index]
        data = df['data'][index]
        cur_line_pos = df['Identification_Status'][index].split(";")

        ## skip if next is dialogue
        if ("".join(nnbl_pos) == 'ps13') or ("".join(nnbl_pos) == 'ps15'):
            continue


        try:
            pnbl_top2 = pnbl_pos[1]

        except:
            pnbl_top2 = ''

        try:
            nnbl_top2 = nnbl_pos[1]

        except:
            nnbl_top2 = ''

        ## ps4 identification made stricter if pnbl top is ps8
        try:
            if pnbl_pos[0] == 'ps8':
                continue
        except:
            pass

        ## examine ps4 = action beginning
        line_identified = False

        if cur_line_pos[0] == 'ps4' and pnbl_pos[0] != 'ps4' and nnbl_pos[0] != 'ps7':
            try:
                print(data)
            except:
                pass
            print(pnbl_pos[0],cur_line_pos[0],nnbl_pos[0])
            if 'ps6' in (pnbl_pos[0]) and df['nlb'][index] == 'N' and ('ps7' not in cur_line_pos):
                ## can make strict by indent also
                print('identifying as ps4 case 1 top 1')
                df['Identification_Status'][index] = 'ps4'
                df['When_Identified'][index] = 'UsingTop2PNNBL'
                line_identified = True

    #         elif pnbl_top2 and 'ps6' in pnbl_top2:

    #             print('identifying as ps4 case 1 top2')
    #             df['Identification_Status'][index] = 'ps4'
    #             df['When_Identified'][index] = 'UsingTop2PNNBL'
    #             line_identified = True
            else:
                print("ps6 not in previous")
    #     if line_identified :
    #         # run pnnbl ineligible
    #         do_while_pnnbl_ineligible_v035.run_pnnbl_ineligible(df)


        if cur_line_pos[0] == 'ps4' and not line_identified and pnbl_pos[0] != 'ps4':
            print("checking for ps5/6 in next")
            if pnbl_pos[0] == 'ps5':
                print("skipping as previous top is ps5" )
                continue
            elif pnbl_top2 and 'ps5' in pnbl_top2:
                print("skipping as previous top2 is ps5" )
                continue
            if cur_indent == nnbl_indent:
                if 'ps5' in nnbl_pos[0] or 'ps6' in nnbl_pos[0]:
                    try:
                        print(data,'identifying as ps4 case 2 top1')
                    except:
                        pass
                    df['Identification_Status'][index] = 'ps4'
                    df['When_Identified'][index] = 'UsingTop2PNNBL'

                elif nnbl_top2 and 'ps5' in nnbl_top2:
                    try:
                        print(data,'identifying as ps4 case 2 top2 ps5')
                    except:
                        pass
                    df['Identification_Status'][index] = 'ps4'
                    df['When_Identified'][index] = 'UsingTop2PNNBL'

                elif nnbl_top2 and 'ps6' in nnbl_top2:
                    try:
                        print(data,'identifying as  ps4 case 2 top2 ps6')
                    except:
                        pass
                    df['Identification_Status'][index] = 'ps4'
                    df['When_Identified'][index] = 'UsingTop2PNNBL'
            else:
                print("current indent is not equal to next indent")

        print("\n")

## commented as ps6 getting wrong
#     for index in df.index[2:-2]:
#         if df['isIdentified'][index] == 'Yes':
#             continue


#         if df['plb'][index] == 'N' :
#             pnbl_pos = df['Identification_Status'][index-1].split(";")
#             pnbl_index = index -1
#         else:
#             pnbl_pos = df['Identification_Status'][index-2].split(";")
#             pnbl_index = index -2

#         if df['nlb'][index] == 'N' :
#             nnbl_pos = df['Identification_Status'][index+1].split(";")
#             nnbl_par = df['parenthetical'][index+1]
#         else:
#             nnbl_pos = df['Identification_Status'][index+2].split(";")
#             nnbl_par = df['parenthetical'][index+2]

#     #     try:
#     #         if df['prvious_line_blank'][pnbl_index] == 'N' :
#     #             ppnbl_pos = df['Identification_Status'][pnbl_index-1].split(";")
#     #         else:
#     #             ppnbl_pos = df['Identification_Status'][pnbl_index-2].split(";")
#     #     except:
#     #         pass


#         line_no = df['line_no'][index]
#         data = df['data'][index]
#         cur_line_pos = df['Identification_Status'][index].split(";")

#         try:
#             pnbl_top2 = pnbl_pos[1]

#         except:
#             pnbl_top2 = ''

#         try:
#             nnbl_top2 = nnbl_pos[1]

#         except:
#             nnbl_top2 = ''
#         ## examine action end
#         if cur_line_pos[0] == 'ps6':
#             try:
#                 print("pnbl",df['data'][pnbl_index])
#             except:
#                 pass
#             #print(pnbl_pos)
#             if 'ps4' in pnbl_pos[0] or 'ps5' in pnbl_pos[0] :
#                 #print(pnbl_pos[0])
#                 try:
#                     print(data)
#                 except:
#                     pass
#                 print("identifying as ps6 using top1 pnbl")
#                 df['Identification_Status'][index] = 'ps6'
#                 df['When_Identified'][index] = 'UsingTop2PNNBL'


#             elif pnbl_top2 and ('ps4' in pnbl_top2 or 'ps5' in pnbl_top2):
#                 try:
#                     print(data)
#                 except:
#                     pass
#                 df['Identification_Status'][index] = 'ps6'
#                 df['When_Identified'][index] = 'UsingTop2PNNBL'
#                 print("identifying as ps6 using top2 pnbl")


    return df


def refine_action_possibilties(df):

    for index in df.index[1:-1]:
        if df['isIdentified'][index] == 'Yes':
            continue


        pnbl_pos = []
        nnbl_pos = []
        if index == 0:
            pnbl_pos = ['blank']
        elif df['plb'][index] == 'N' :
            pnbl_pos = df['Identification_Status'][index-1].split(";")
            pnbl_index = index -1
        elif index - 1 == 0:
            pnpl_pos = ['blank']
        else:
            pnbl_pos = df['Identification_Status'][index-2].split(";")
            pnbl_index = index -2

        if index == df.index[-1]:
            nnbl_pos = ['blank']
        elif df['nlb'][index] == 'N' :
            nnbl_pos = df['Identification_Status'][index+1].split(";")
            nnbl_par = df['parenthetical'][index+1]
        elif index+1 == df.index[-1]:
            nnbl_pos = ['blank']
        else:
            nnbl_pos = df['Identification_Status'][index+2].split(";")
            nnbl_par = df['parenthetical'][index+2]

    #     try:
    #         if df['prvious_line_blank'][pnbl_index] == 'N' :
    #             ppnbl_pos = df['Identification_Status'][pnbl_index-1].split(";")
    #         else:
    #             ppnbl_pos = df['Identification_Status'][pnbl_index-2].split(";")
    #     except:
    #         pass


        line_no = df['line_no'][index]
        data = df['data'][index]
        cur_line_pos = df['Identification_Status'][index].split(";")

    #     try:
    #         pnbl_top2 = pnbl_pos[1]

    #     except:
    #         pnbl_top2 = ''

    #     try:
    #         nnbl_top2 = nnbl_pos[1]

    #     except:
    #         nnbl_top2 = ''

    #     print(line_no,data)
    #     print(pnbl_pos)
    #     print(cur_line_pos)
    #     print(nnbl_pos)
        line_new_pos = []
        #using pnbl and nnbl identified lines refine/identify current line
    #     if "".join(pnbl_pos) in ('ps15','ps6') and cur_line_pos[0] == 'ps4':
    #         print(line_no,data)
    #         print("pnbl is 15 or 6 and current top is 'ps4'")
    #         print("Identifying as ps4")
    #         df['Identification_Status'][index] = 'ps4'
    #         cur_line_pos = ['ps4']
    #         df['When_Identified'][index] = 'RefiningActionPossibilities'

        line_new_pos = cur_line_pos
        if "".join(nnbl_pos) == 'ps7':
            try:
                print(line_no,data)
            except:
                pass
            print("remove ps5,14")
            if 'ps5' in line_new_pos:
                line_new_pos.remove('ps5')
            if 'ps14' in line_new_pos:
                line_new_pos.remove('ps14')

        if "".join(nnbl_pos) == 'ps4':
            try:
                print(line_no,data)
            except:
                pass
            print("remove ps5")
            if 'ps5' in line_new_pos:
                line_new_pos.remove('ps5')

        if "".join(pnbl_pos) == 'ps4':
            try:
                print(line_no,data)
            except:
                pass
            print("remove ps3 and 7")
            if 'ps3' in line_new_pos:
                line_new_pos.remove('ps3')
            if 'ps7' in line_new_pos:
                line_new_pos.remove('ps7')

        df['Identification_Status'][index] = ";".join(line_new_pos)

    return df


def prep_pnnbl_eligible_csv(pnbl_eligibility_matrix,nnbl_eligibility_matrix):

    cur_dir = mypath
    # cur_dir = os.getcwd()

    pnbl_eligible_df = pd.read_csv(pnbl_eligibility_matrix, skiprows = [0])
    nnbl_eligible_df = pd.read_csv(nnbl_eligibility_matrix, skiprows = [0])


    pnbl_eligible_df.rename(columns={pnbl_eligible_df.columns[1]:'Possibilities',pnbl_eligible_df.columns[0]:'Description'}
                       ,inplace = True)
    nnbl_eligible_df.rename(columns={nnbl_eligible_df.columns[1]:'Possibilities',nnbl_eligible_df.columns[0]:'Description'}
                           ,inplace = True)

    pnbl_eligible_df.to_csv(os.path.join(cur_dir,'pnbl_eligible_pos.csv'), index =False)
    nnbl_eligible_df.to_csv(os.path.join(cur_dir,'nnbl_eligible_pos.csv'), index =False)

    pnbl_eligible_df = pd.read_csv(os.path.join(cur_dir,'pnbl_eligible_pos.csv'), index_col = ['Possibilities'])
    nnbl_eligible_df = pd.read_csv(os.path.join(cur_dir,'nnbl_eligible_pos.csv'), index_col = ['Possibilities'])


def check_eligibility_using_identified_pnnbl(df):

    total_pos_before = 0
    total_pos_after = 0
    lines_identified = 0
    cur_dir = mypath
    pnbl_eligible_df = pd.read_csv(os.path.join(cur_dir,'pnbl_eligible_pos.csv'))
    nnbl_eligible_df = pd.read_csv(os.path.join(cur_dir,'nnbl_eligible_pos.csv'))


    for index in df.index:
        if df['isIdentified'][index] == 'Yes':
            total_pos_before += 1
            total_pos_after += 1
            print(total_pos_before,total_pos_after)
            continue


        line_no = df['line_no'][index]
        data = df['data'][index]
        cur_line_pos = df['Identification_Status'][index].split(";")
        if cur_line_pos[0] != '':
            total_pos_before += len(cur_line_pos)

        pnbl_pos = []
        nnbl_pos = []
        if index == 0:
            pnbl_pos = ['blank']
        elif df['plb'][index] == 'N' :
            pnbl_pos = df['Identification_Status'][index-1].split(";")
            pnbl_index = index -1
        elif index - 1 == 0:
            pnpl_pos = ['blank']
        else:
            pnbl_pos = df['Identification_Status'][index-2].split(";")
            pnbl_index = index -2

        if index == df.index[-1]:
            nnbl_pos = ['blank']
        elif df['nlb'][index] == 'N' :
            nnbl_pos = df['Identification_Status'][index+1].split(";")
            nnbl_par = df['parenthetical'][index+1]
        elif index+1 == df.index[-1]:
            nnbl_pos = ['blank']
        else:
            nnbl_pos = df['Identification_Status'][index+2].split(";")
            nnbl_par = df['parenthetical'][index+2]


        line_new_pos = cur_line_pos
        try:
            print(line_no,data)
        except:
            pass
        print("current line pos", cur_line_pos,df['Identification_Status'][index])
        try:
            print("previous line pos",pnbl_pos)
            print("next line pos",nnbl_pos)
        except:
            pass
        if len(pnbl_pos) == 1 and pnbl_pos[0] != 'blank':
            print("pnbl is identified as ", pnbl_pos)
            ## keep only possibilities which can exist with this pnbl
            ## filter
            pnbl_eligible_pos = pnbl_eligible_df.loc[pnbl_eligible_df[pnbl_pos[0]] == 'yes','Possibilities'].to_list()
            print("eligible possibilties as per pnbl",pnbl_eligible_pos)
            line_new_pos = [ps for ps in line_new_pos if ps in pnbl_eligible_pos]
            print("line new possibilities", line_new_pos)
        else:
            print("previous line not identified")

        if len(nnbl_pos) == 1 and nnbl_pos[0] != 'blank':
            print("nnbl is identified as ", nnbl_pos)
            ## keep only possibilities which can exist with this pnbl
            ## filter
            nnbl_eligible_pos = nnbl_eligible_df.loc[nnbl_eligible_df[nnbl_pos[0]] == 'yes','Possibilities'].to_list()
            print("eligible possibilties as per nnbl",nnbl_eligible_pos)
            line_new_pos = [ps for ps in line_new_pos if ps in nnbl_eligible_pos]
            print("line new possibilities", line_new_pos)
        else:
            print("next line not identified")

        ## make null as special term
        if len(line_new_pos) == 0:
            print("making null possibility special term ps17")
            line_new_pos = ['ps17']

        if len(line_new_pos) == 1:
            df['isIdentified'][index] = 'Yes'
            lines_identified += 1
        df['Identification_Status'][index] = (";").join(line_new_pos)
        total_pos_after += len(line_new_pos)
        print(total_pos_before,total_pos_after)

    print(total_pos_before,total_pos_after)
    pos_decreased = True if total_pos_after < total_pos_before else False
    return df,pos_decreased,lines_identified


def do_while_examine_using_identified_pnnbl(df):


    pos_decreased = True
    total_lines_identified = 0

    iteration = 0
    while pos_decreased :
        iteration += 1
        df,pos_decreased,lines_identified = check_eligibility_using_identified_pnnbl(df)
        total_lines_identified += lines_identified
        print(iteration,total_lines_identified)
    print(iteration,total_lines_identified)

    return df


def start_top_identifications_part1(df):


    # loop through to examine speaker extension
    for index in df.index[1:-1]:
        if df['isIdentified'][index] == 'Yes':
            continue

        pnbl_pos = []
        nnbl_pos = []
        pnbl_index = index -1
        nnbl_index = index +1
        if index == 0:
            pnbl_pos = ['blank']
        elif df['plb'][index] == 'N' :
            pnbl_pos = df['Identification_Status'][index-1].split(";")
            pnbl_index = index -1
        elif index - 1 == 0:
            pnpl_pos = ['blank']
        else:
            pnbl_pos = df['Identification_Status'][index-2].split(";")
            pnbl_index = index -2

        if index == df.index[-1]:
            nnbl_pos = ['blank']
        elif df['nlb'][index] == 'N' :
            nnbl_pos = df['Identification_Status'][index+1].split(";")
            nnbl_par = df['parenthetical'][index+1]
            nnbl_index = index +1
        elif index+1 == df.index[-1]:
            nnbl_pos = ['blank']
        else:
            nnbl_pos = df['Identification_Status'][index+2].split(";")
            nnbl_par = df['parenthetical'][index+2]
            nnbl_index = index +2


        cur_indent = df['ssc'][index]
        pnbl_indent = df['ssc'][pnbl_index]
        nnbl_indent = df['ssc'][nnbl_index]

        try:
            if df['plb'][pnbl_index] == 'N' :
                ppnbl_pos = df['Identification_Status'][pnbl_index-1].split(";")
            else:
                ppnbl_pos = df['Identification_Status'][pnbl_index-2].split(";")
            ppnbl_exists = True
        except:
            ppnbl_exists = False
            pass


        line_no = df['line_no'][index]
        data = df['data'][index]
        cur_line_pos = df['Identification_Status'][index].split(";")
        cur_par = df['parenthetical'][index]

        try:

            pnbl_par = df['parenthetical'][pnbl_index]
            pnbl_case = df['case'][pnbl_index]
            pnbl_data = df['data'][pnbl_index]
        except:
            pass

        try:
            pnbl_top2 = pnbl_pos[1]

        except:
            pnbl_top2 = ''

    #     try:
    #         nnbl_top2 = nnbl_pos[1]

    #     except:
    #         nnbl_top2 = ''

    #     print(line_no,data)
    #     print(pnbl_pos)
    #     print(cur_line_pos)
    #     print(nnbl_pos)
        line_new_pos = []
        #using pnbl and nnbl identified lines refine/identify current line
    #     if "".join(pnbl_pos) in ('ps15','ps6') and cur_line_pos[0] == 'ps4':
    #         print(line_no,data)
    #         print("pnbl is 15 or 6 and current top is 'ps4'")
    #         print("Identifying as ps4")
    #         df['Identification_Status'][index] = 'ps4'
    #         cur_line_pos = ['ps4']
    #         df['When_Identified'][index] = 'RefiningActionPossibilities'


        cur_line_pos  = df['Identification_Status'][index].split(";")
        top1 = cur_line_pos[0]
        top2 = top1
        top3 = top1
        top4 = top1
        top5 = top1
        if len(cur_line_pos) == 5:
            top5 = cur_line_pos[4]
        if len(cur_line_pos) >= 4:
            top4 = cur_line_pos[3]

        if len(cur_line_pos) >= 3:
            top3 = cur_line_pos[2]
        if len(cur_line_pos) >= 2:
            top2 = cur_line_pos[1]


        ## if top is 1,6,7,16 identify them
        ## identify as 7 where 9 is top and 7 is 2nd and parenthetical absent
        if cur_line_pos[0] == 'ps1' or (cur_line_pos[0] =='ps6' and pnbl_par =='Absent' and "".join(nnbl_pos) != 'ps6' and nnbl_pos[0] != 'ps5' and nnbl_pos[0] != 'ps6') or cur_line_pos[0] == 'ps7' or cur_line_pos[0] == 'ps9' or cur_line_pos[0] == 'ps8' or (cur_line_pos[0] == 'ps16' and nnbl_pos[0] != 'ps13' and nnbl_pos[0] != 'ps15' and nnbl_pos[0] != 'ps10'):
            try:
                print(line_no,data)
            except:
                pass
            if(len(cur_line_pos) > 1):
                if cur_line_pos[0] == 'ps6' and 'ps15' in (top1,top2,top3,top4,top5):
                    print("not identifying as ps6 can also be ps15 ")
                    continue
                if cur_line_pos[0] == 'ps6' and ( pnbl_pos[0] == 'ps13' or pnbl_pos == 'ps14'):
                    print("not identifying as ps6 as could be ps15")
                    continue
                if cur_line_pos[0] == 'ps6' and (pnbl_case == 'AllUpper' and len(pnbl_data.split()) == 1) :
                    print("not identifying as ps6 can also be ps15 ")
                    continue
                if cur_line_pos[0] == 'ps6' and (len(data.split()) == 1 and cur_indent > pnbl_indent ) :
                    print("not identifying as ps6 can also be ps7 ")
                    continue

            print("identifying as top",cur_line_pos[0])

            if cur_line_pos[0] == 'ps9' and top2 == 'ps7' and cur_par == 'Absent':
                line_new_pos.append(top2)
            else:
                line_new_pos.append(cur_line_pos[0])

            try:

                if  nnbl_pos[0] == 'ps1' and 'ps2' in cur_line_pos :
                    line_new_pos.append('ps2')
                    print("added ps2 to ps1")
            except:
                print("possiblity next line pos not available")
                pass

            try:

                if  pnbl_pos[0] == 'ps1' and 'ps3' in cur_line_pos :
                    line_new_pos.append('ps3')
                    print("added ps3 to ps1")
            except:
                print("possiblity previous line pos not available")
                pass

            if top1 == 'ps1' and (top2 == 'ps6' or top3 == 'ps6' or top2 == 'ps8'):
                ## not indentifying as ps1
                continue

            if  cur_line_pos[0] == 'ps1' and 'ps30' in cur_line_pos :
                line_new_pos.append('ps30')
                print("added ps30 to ps1")


            df['Identification_Status'][index] = ";".join(line_new_pos)
            df['When_Identified'][index] = 'StartIdentifyingTopsPart1'
            continue

        ppnbl_top_not_16 = True
        if ppnbl_exists:
            ppnbl_top_not_16 = False if ppnbl_pos[0] == 'ps16' else True

        if cur_line_pos[0] == 'ps5' or cur_line_pos[1] == 'ps5':
            if 'ps16' not in pnbl_top2:
                if (pnbl_pos[0] == 'ps4' or (pnbl_pos[0] == 'ps5' and ppnbl_top_not_16 ))  and df['nlb'][index] == 'N' and cur_indent == pnbl_indent and cur_indent == nnbl_indent :
                    print("code commented")

    #                  print("identifying current as ps5")
    #                 print(line_no,data)
    #                 df['Identification_Status'][index] = 'ps5'
    #                 df['When_Identified'][index] = 'StartIdentifyingTops'
                elif pnbl_pos[0] == 'ps4' and df['nlb'][index] == 'Y' and cur_indent == pnbl_indent :
                    print("identifying current as ps6 as next also blank")
                    try:
                        print(line_no,data)
                    except:
                        pass
                    df['Identification_Status'][index] = 'ps6'
                    df['When_Identified'][index] = 'StartIdentifyingTopsPart1'

    ## additonally identify the ps8
    #speaker_list = df.loc[df['Identification_Status'] == 'ps7','data'].to_list()
    for index in df.index[1:-1]:
        if df['isIdentified'][index] == 'Yes':
            continue
        cur_line_pos  = df['Identification_Status'][index].split(";")
        data  = df['data'][index]

        extn_found = False
        extn_list = ['O.S.','V.O.',"CONT'D","CONT’D",'VOICE','CONT.']
        for extn in extn_list:
            if extn in str(data):
                extn_found = True
                break


        if cur_line_pos[0] == 'ps8' and extn_found:
            df['Identification_Status'][index] = 'ps8'
            df['When_Identified'][index] = 'StartIdentifyingTopsPart1'

    return df


def start_top_identifications_part1_diluted(df):


    print("in tops diluted")
    # loop through to examine speaker extension
    for index in df.index[1:-1]:
        if df['isIdentified'][index] == 'Yes':
            continue
        print(index)
        pnbl_pos = []
        nnbl_pos = []
        pnbl_index = index -1
        nnbl_index = index +1
        if index == 0:
            pnbl_pos = ['blank']
        elif df['plb'][index] == 'N' :
            pnbl_pos = df['Identification_Status'][index-1].split(";")
            pnbl_index = index -1
        elif index - 1 == 0:
            pnpl_pos = ['blank']
        else:
            pnbl_pos = df['Identification_Status'][index-2].split(";")
            pnbl_index = index -2

        if index == df.index[-1]:
            nnbl_pos = ['blank']
        elif df['nlb'][index] == 'N' :
            nnbl_pos = df['Identification_Status'][index+1].split(";")
            nnbl_par = df['parenthetical'][index+1]
            nnbl_index = index +1
        elif index+1 == df.index[-1]:
            nnbl_pos = ['blank']
        else:
            nnbl_pos = df['Identification_Status'][index+2].split(";")
            nnbl_par = df['parenthetical'][index+2]
            nnbl_index = index +2


        cur_indent = df['ssc'][index]
        try:
            pnbl_indent = df['ssc'][pnbl_index]
            pnbl_case = df['case'][pnbl_index]
        except:
            pnbl_indent = -1
            pnbl_case = ''
        try:
            nnbl_indent = df['ssc'][nnbl_index]
            nnbl_case = df['case'][nnbl_index]
        except:
            nnbl_indent = -1
            nnbl_case = ''


        #nnbl_indent = df['ssc'][nnbl_index]

        try:
            if df['plb'][pnbl_index] == 'N' :
                ppnbl_pos = df['Identification_Status'][pnbl_index-1].split(";")
            else:
                ppnbl_pos = df['Identification_Status'][pnbl_index-2].split(";")
            ppnbl_exists = True
        except:
            ppnbl_exists = False
            pass


        line_no = df['line_no'][index]
        data = df['data'][index]
        cur_line_pos = df['Identification_Status'][index].split(";")
        cur_line_case = df['case'][index]
        pnbl_par = df['parenthetical'][pnbl_index]

        try:
            pnbl_top2 = pnbl_pos[1]

        except:
            pnbl_top2 = ''

    #     try:
    #         nnbl_top2 = nnbl_pos[1]

    #     except:
    #         nnbl_top2 = ''

    #     print(line_no,data)
    #     print(pnbl_pos)
    #     print(cur_line_pos)
    #     print(nnbl_pos)
        line_new_pos = []
        #using pnbl and nnbl identified lines refine/identify current line
    #     if "".join(pnbl_pos) in ('ps15','ps6') and cur_line_pos[0] == 'ps4':
    #         print(line_no,data)
    #         print("pnbl is 15 or 6 and current top is 'ps4'")
    #         print("Identifying as ps4")
    #         df['Identification_Status'][index] = 'ps4'
    #         cur_line_pos = ['ps4']
    #         df['When_Identified'][index] = 'RefiningActionPossibilities'


        cur_line_pos  = df['Identification_Status'][index].split(";")
        top1 = cur_line_pos[0]
        top2 = top1
        top3 = top1
        top4 = top1
        top5 = top1
        if len(cur_line_pos) == 5:
            top5 = cur_line_pos[4]
        if len(cur_line_pos) >= 4:
            top4 = cur_line_pos[3]

        if len(cur_line_pos) >= 3:
            top3 = cur_line_pos[2]
        if len(cur_line_pos) >= 2:
            top2 = cur_line_pos[1]


        if top1 == 'ps1' and (top2 == 'ps6' or top3 == 'ps6' or top2 == 'ps8'):
            ## not indentifying as ps1
            continue


        ## if top is 1,6,7,16 identify them
        if cur_line_pos[0] == 'ps1' or (cur_line_pos[0] =='ps6' and pnbl_par =='Absent' and "".join(nnbl_pos) != 'ps6' and nnbl_pos[0] != 'ps5') or cur_line_pos[0] == 'ps7' or (cur_line_pos[0] == 'ps16' and nnbl_pos[0] != 'ps15'):
            try:
                print(line_no,data)
            except:
                pass
            if(len(cur_line_pos) > 1):
                if cur_line_pos[0] == 'ps6' and 'ps15' in (top1,top2,top3,top4,top5):
                    print("not identifying as ps6 can also be ps15 ")
                    continue
            print("identifying as top",cur_line_pos[0])

            line_new_pos.append(cur_line_pos[0])
            if  cur_line_pos[0] == 'ps1' and 'ps30' in cur_line_pos :
                line_new_pos.append('ps30')
                print("added ps30 to ps1")

            df['Identification_Status'][index] = ";".join(line_new_pos)
            df['When_Identified'][index] = 'StartIdentifyingTopsDiluted'
            continue

        ppnbl_top_not_16 = True
        if ppnbl_exists:
            ppnbl_top_not_16 = False if ppnbl_pos[0] == 'ps16' else True

        if cur_line_pos[0] == 'ps5' or cur_line_pos[1] == 'ps5':
            if 'ps16' not in pnbl_top2:

                if (pnbl_pos[0] == 'ps4' or (pnbl_pos[0] == 'ps5' and ppnbl_top_not_16 ))  and df['nlb'][index] == 'N' and cur_indent == pnbl_indent and cur_indent == nnbl_indent and pnbl_case != 'AllUpper' and cur_line_case != 'AllUpper' and nnbl_case !='AllUpper':
                    print("Lenient: code not commented")

                    print("identifying current as ps5")
                    try:
                        print(line_no,data)
                    except:
                        pass
                    df['Identification_Status'][index] = 'ps5'
                    df['When_Identified'][index] = 'StartIdentifyingTopsDiluted'
                elif pnbl_pos[0] == 'ps4' and df['nlb'][index] == 'Y' and cur_indent == pnbl_indent :
                    print("identifying current as ps6 as next also blank")
                    try:
                        print(line_no,data)
                    except:
                        pass

                    df['Identification_Status'][index] = 'ps6'
                    df['When_Identified'][index] = 'StartIdentifyingTopsDiluted'


    return df

def examine_speaker_mix_part1(df,audit_df):
    
    df = df.sort_index().reset_index(drop=True)
    audit_df = df.sort_index().reset_index(drop=True)
    
    #df = df.sort_index().reset_index(drop=True)
    for index in df.index:
        if df['isIdentified'][index] == 'Yes' or  pd.isna(df['Identification_Status'][index]):
            continue


        line_no = df['line_no'][index]
        data = df['data'][index]
        cur_line_pos = df['Identification_Status'][index].split(";")


        ## if parenthetical at last then split to new line
        if 'ps30' not in cur_line_pos:
            continue

        if cur_line_pos[0] == 'ps1' or cur_line_pos[0] == 'ps2' or cur_line_pos[0] == 'ps14' or cur_line_pos[0] == 'ps5' or cur_line_pos[0] == 'ps13' or cur_line_pos[0] == 'ps4' :
            # skipping as could be slugline
            continue
        
        extn_found = False
        extn_list = ['O.S.','V.O.',"CONT'D","CONT’D",'VOICE']
        for extn in extn_list:
            if extn in str(data):
                extn_found = True
                break

        if df['parenthetical'][index] == 'PartMidEnd' and not extn_found :
            try:
                print(data)
            except:
                pass
            if re.search('\(',data,re.IGNORECASE):
                pos_starts = re.search('\(',data,re.IGNORECASE).start()
                #pos_end = re.search('(',data,re.IGNORECASE).end()
                before_par = data[:pos_starts]
                after_par = data[pos_starts:]
                print("before_par = data[:pos_starts]  line 5557:", before_par)
                print("after_par = data[pos_starts:] line 5558 :", after_par)
                print ("Seperating Parenthetical")
                print("Identifying as speaker mix with dialogue and current pos")
                print(cur_line_pos)
                print(index)
                if not before_par.isupper():
                    # skip as possibly not speaker
                    continue
                # try:
                #     print(before_par)
                # except:
                #     pass
                #print("df['data'][index]:",df['data'][index])
                try:
                    df['data'][index] = before_par 
                except:
                    df['data'][int(index)] = before_par 
                
                df['parenthetical'][index] = 'Absent'
                
                df['When_Identified'][index] = 'ExaminingSpeakerMix'
                
                df['Identification_Status'][index] = ";".join(cur_line_pos)
                nlb = df['nlb'][index]
                df['nlb'][index] = 'N'

                line_no = df['line_no'][index]
                next_line_no = df['line_no'][index+1]
                try:
                    new_line_no = (line_no + next_line_no) / 2
                except:
                    new_line_no = (int(line_no) + int(next_line_no)) / 2
                if new_line_no in audit_df.index:
                    new_line_no = (new_line_no + next_line_no)/2
                audit_df.loc[new_line_no] = np.nan
                audit_df.loc[new_line_no]['line_removed'] = 'No'
                print(
                    "index:",index,"\n",
                    "df['data'][index]:",df['data'][index],"\n",
                    "df['parenthetical'][index]:",df['parenthetical'][index],"\n",
                    "df['When_Identified'][index]:",df['When_Identified'][index],"\n",
                    "df['Identification_Status'][index]:",df['Identification_Status'][index],"\n",
                    "df['nlb'][index]:",df['nlb'][index],"\n",
                )
                try:
                    
                    audit_df.loc[line_no]['line_broken_into_multiple_lines'] = 'Separated Speaker Mixed with Parenthetical'
                except:
                    
                    audit_df.loc[np.float64(line_no)]['line_broken_into_multiple_lines'] = 'Separated Speaker Mixed with Parenthetical'
                print(df['Identification_Status'][index])
                try:
                    print(after_par)
                except:
                    pass
                print("identifying parenthetical")
                df.loc[index + 0.25] = np.nan
                df.loc[index + 0.25,'data'] = after_par
                df.loc[index + 0.25,'parenthetical'] = 'Complete'
                df.loc[index + 0.25,'When_Identified'] ='ExaminingSpeakerMix'
                df.loc[index + 0.25,'Identification_Status'] = 'ps10'
                df.loc[index + 0.25,'case'] = ''
                df.loc[index + 0.25,'plb'] = 'N'
                df.loc[index + 0.25,'nlb'] = nlb
                df.loc[index + 0.25,'line_no'] = new_line_no

                df = df.sort_index().reset_index(drop=True)
                continue

    return df
#     df.to_csv(p.output_file_path,index=False)

#     lines_not_removed = audit_df.loc[audit_df['line_removed'] != 'Yes'].index.to_list()
#     audit_df.sort_index(inplace= True)
#     audit_df.reset_index(inplace= True)

#     for line in lines_not_removed:
#         new_data = ''
#         try:
#             new_data =df.loc[df['line_no'] == line, 'data'].values[0]
#         except:
#             pass
#         #print(new_data)
#         audit_df.loc[audit_df['line_no'] == line, 'data_corrected'] = new_data
#         #print(audit_df.loc[audit_df['line_no'] == line, 'data_corrected'])


#     audit_df.to_csv(p.audit_report_path, index = False)

def examine_speaker_mix_part2(df,audit_df):

    ## examine the ps30s and split with colon and all caps speaker
    print("Start speaker mix part2")
    for index in df.index:

        line_no = df['line_no'][index]
        data = df['data'][index]
        cur_line_pos = df['Identification_Status'][index].split(";")

        if 'ps30' not in cur_line_pos[0]:
            continue
        try:
            if 'ps30' not in cur_line_pos[1]:
                continue
        except:
            pass

        if cur_line_pos[0] == 'ps1' or cur_line_pos[0] == 'ps2' :
            # skipping as could be slugline
            continue

        speaker = ''
        dialogue = ''
        ## search colon and spearate after colon
        try:
            print("data:\n",data)
        except:
            pass
        extn_found = False
        extn_list = ['O.S.','V.O.',"CONT'D","CONT’D",'VOICE']
        print(extn_list)
        for extn in extn_list:
            if extn in str(data):
                extn_found = True
                break

        if re.search('\:',data,re.IGNORECASE) and not extn_found:
            pos_starts = re.search('\:',data,re.IGNORECASE).start()
            #pos_end = re.search('(',data,re.IGNORECASE).end()
            before_colon = data[:pos_starts]
            after_colon = data[pos_starts+1:]

            if not before_colon or before_colon.strip().isspace():
                print ("nothing before colon")
                continue
            print ("Seperating speaker dialogue separated by colon")
            print(index)
            try:
                print(before_colon)
            except:
                pass
            df['data'][index] = before_colon
            df['parenthetical'][index] = 'Absent'
            df['When_Identified'][index] = 'ExaminingSpeakerMixDialogue'
            #df['case_format'][index] = 'AllUpper'
            df['Identification_Status'][index] = 'ps7'
            nlb = df['nlb'][index]
            df['nlb'][index] = 'N'

            line_no = df['line_no'][index]
            next_line_no = df['line_no'][index+1]
            new_line_no = (line_no + next_line_no) / 2
            if new_line_no in audit_df.index:
                new_line_no = (new_line_no + next_line_no)/2
            audit_df.loc[new_line_no] = ''
            audit_df.loc[new_line_no]['line_removed'] = 'No'

            audit_df.loc[line_no]['line_broken_into_multiple_lines'] = 'Separated Speaker and Dialogue mixed with colon:'


            #print(df['Identification_Status'][index])
            try:
                print(after_colon)
            except:
                pass
            print("identifying after colon as dialogue end")
            df.loc[index + 0.25] = ''
            df.loc[index + 0.25,'data'] = after_colon
            df.loc[index + 0.25,'parenthetical'] = 'Absent'
            df.loc[index + 0.25,'When_Identified'] ='ExaminingSpeakerMixDialogue'
            df.loc[index + 0.25,'Identification_Status'] = 'ps15'
            df.loc[index + 0.25,'case'] = ''
            df.loc[index + 0.25,'plb'] = 'N'
            df.loc[index + 0.25,'nlb'] = nlb
            df.loc[index + 0.25,'line_no'] = new_line_no

            df = df.sort_index().reset_index(drop=True)
            continue


        elif cur_line_pos[0] == 'ps30':
            words = data.lstrip().split(" ")
            k = 0
            for word in words:
                try:
                    print(word)
                except:
                    pass
                if word.isupper():
                    k += 1
                else:
                    break
            print(k)
            if k != 0:
                for i in range(0,k):
                    speaker += words[i] + ' '
                for j in range(k,len(words)):
                    dialogue += words[j] + ''


            print ("Seperating speaker dialogue for ps30")
            print(index)
            try:
                print(speaker.strip())
            except:
                pass
            if not speaker or not speaker.strip() or not dialogue.strip():
                print("unable to separate speaker from line, speaker possibly blank or line is not speaker dialogue mix",index)
                continue

            df['data'][index] = speaker.strip()
            print("df['data'][index]:",df['data'][index])

            df['parenthetical'][index] = 'Absent'
            print("df['parenthetical'][index]:",df['parenthetical'][index])

            df['When_Identified'][index] = 'ExaminingSpeakerMixDialogue'
            print("df['When_Identified'][index]:",df['When_Identified'][index])

            df['case'][index] = 'AllUpper'
            print("df['case'][index]:",df['case'][index])

            df['Identification_Status'][index] = 'ps7'
            print("df['Identification_Status'][index]:",df['Identification_Status'][index])

            nlb = df['nlb'][index]
            print("nlb",nlb)

            df['nlb'][index] = 'N'
            print("df['nlb'][index]:",df['nlb'][index])

            #print(df['Identification_Status'][index])
            line_no = df['line_no'][index]
            print("line_no", line_no)

            next_line_no = df['line_no'][index+1]
            print("next_line_no:", next_line_no)
            try:
                print("entering")
                new_line_no = (float(line_no) + float(next_line_no)) / 2
            except:
                a = float(line_no)
                b = float(next_line_no)
                c = float((line_no + new_line_no)/2)
                print(type(c))
                new_line_no = c

            print("new_line_no:", new_line_no)

            try:
                print("try block")
                if new_line_no in audit_df.index:
                    print("inside if block")
                    new_line_no = (new_line_no + next_line_no)/2
            except Exception as e:
                print(f"An error occurred: {e}")

            # if new_line_no in audit_df.index:
            #     print("inside if block")
            #     new_line_no = (new_line_no + next_line_no)/2

            print("after if block")
            #audit_df.loc[new_line_no] = ''
            #audit_df.loc[new_line_no]['line_removed'] = 'No'
            try:
                print("try")
                audit_df.loc[new_line_no, 'line_removed'] = 'No'
            except:
                print("except")
                audit_df.loc[new_line_no] = ''
                audit_df.loc[new_line_no, 'line_removed'] = 'No'
            print("audit_df.loc[new_line_no]['line_removed']:",audit_df.loc[new_line_no]['line_removed'])
            
            try:
                print("try")
                audit_df.loc[line_no ,'line_broken_into_multiple_lines'] = 'Separated Speaker and Dialogue '
            except:
                print("except")
                audit_df.loc[line_no] = ''
                audit_df.loc[line_no ,'line_broken_into_multiple_lines'] = 'Separated Speaker and Dialogue '
            print("audit_df.loc[line_no]['line_broken_into_multiple_lines']:",audit_df.loc[line_no]['line_broken_into_multiple_lines'])

            #audit_df.loc[line_no]['line_broken_into_multiple_lines'] = 'Separated Speaker and Dialogue '


            print("identifying dialogue from ps30 as ps13;ps15")
            try:
                print(dialogue.strip())
            except:
                pass
            df.loc[index + 0.25] = ''
            df.loc[index + 0.25,'data'] = dialogue.strip()
            df.loc[index + 0.25,'parenthetical'] = 'Absent'
            df.loc[index + 0.25,'When_Identified'] ='ExaminingSpeakerMixDialogue'
            df.loc[index + 0.25,'Identification_Status'] = 'ps15;ps13'
            df.loc[index + 0.25,'case'] = ''
            df.loc[index + 0.25,'plb'] = 'N'
            df.loc[index + 0.25,'nlb'] = nlb
            df.loc[index + 0.25,'line_no'] = new_line_no


            df = df.sort_index().reset_index(drop=True)
            continue

    return df
#     df.to_csv(p.output_file_path, index = False)


#     lines_not_removed = audit_df.loc[audit_df['line_removed'] != 'Yes'].index.to_list()
#     audit_df.sort_index(inplace= True)
#     audit_df.reset_index(inplace= True)

#     for line in lines_not_removed:
#         new_data = ''
#         try:
#             new_data =df.loc[df['line_no'] == line, 'data'].values[0]
#         except:
#             pass
#         #print(new_data)
#         audit_df.loc[audit_df['line_no'] == line, 'data_corrected'] = new_data
#         #print(audit_df.loc[audit_df['line_no'] == line, 'data_corrected'])


#     audit_df.to_csv(p.audit_report_path, index = False)


def start_top_identifications_part2(df):

    for index in df.index:

        if df['isIdentified'][index] == 'Yes' or  pd.isna(df['Identification_Status'][index]):
            continue


        pnbl_pos = []
        nnbl_pos = []
        pnbl_index = -1
        prev_flag = False
        next_flag =  False
        pnbl_index = index -1
        nnbl_index = index +1

        if index == 0:
            pnbl_pos = ['blank']
            pnbl_index = 'first'
        elif df['plb'][index] == 'N' :
            pnbl_pos = df['Identification_Status'][index-1].split(";")
            pnbl_index = index -1
        elif index - 1 == 0:
            pnpl_pos = ['blank']
            pnbl_index = 'first'
        else:
            pnbl_pos = df['Identification_Status'][index-2].split(";")
            pnbl_index = index -2

        if index == df.index[-1]:
            nnbl_pos = ['blank']
            nnbl_index = 'last'
        elif df['nlb'][index] == 'N' :
            nnbl_pos = df['Identification_Status'][index+1].split(";")
            nnbl_par = df['parenthetical'][index+1]
            nnbl_index = index + 1
        elif index+1 == df.index[-1]:
            nnbl_pos = ['blank']
            nnbl_index = 'last'
        else:
            nnbl_pos = df['Identification_Status'][index+2].split(";")
            nnbl_par = df['parenthetical'][index+2]
            nnbl_index = index + 2


        cur_indent = df['ssc'][index]
        try:
            pnbl_indent = df['ssc'][pnbl_index]
        except:
            pnbl_indent = -1
        try:
            nnbl_indent = df['ssc'][nnbl_index]
        except:
            nnbl_indent = -1

    #     try:
    #         if df['prvious_line_blank'][pnbl_index] == 'N' :
    #             ppnbl_pos = df['Identification_Status'][pnbl_index-1].split(";")
    #         else:
    #             ppnbl_pos = df['Identification_Status'][pnbl_index-2].split(";")
    #     except:
    #         pass


        line_no = df['line_no'][index]
        data = df['data'][index]
        cur_line_pos = df['Identification_Status'][index].split(";")
        pnbl_par = df['parenthetical'][pnbl_index] if pnbl_index != 'first' else False


        try:
            pnbl_top2 = pnbl_pos[1]

        except:
            pnbl_top2 = ''

    #     try:
    #         nnbl_top2 = nnbl_pos[1]

    #     except:
    #         nnbl_top2 = ''

        cur_indent = df['ssc'][index]
        #print(cur_indent)

        #print("examining")

        #print(line_no,cur_indent,data)
        #print(cur_line_pos)
    #     print(pnbl_pos)
    #     print(cur_line_pos)
    #     print(nnbl_pos)
        line_new_pos = []
        pdil_pos = []
        ndil_pos = []
        ## lets find previous different indent line
        #print("looking for previous different indent line")
        pdil_index = index
        while pdil_index != 0:
            pdil_indent = df['ssc'][pdil_index]
            pdil_line_no = df['line_no'][pdil_index]
            if df['Identification_Status'][pdil_index] != 'blank' and pdil_indent != cur_indent:
                #print(pdil_line_no,pdil_indent,df['Identification_Status'][pdil_index])
                #print(df['data'][pdil_index])
                pdil_pos = df['Identification_Status'][pdil_index].split(";")
                break

            pdil_index -= 1

        if pdil_index == 0:
            prev_flag = 'start'
            #print(prev_flag)

        #print("looking for next different indent line")
        ndil_index = index
        while ndil_index != df.index[-1]:
            ndil_indent = df['ssc'][ndil_index]
            ndil_line_no = df['line_no'][ndil_index]
            if df['Identification_Status'][ndil_index] != 'blank' and ndil_indent != cur_indent:
                #print(ndil_line_no,ndil_indent,df['Identification_Status'][ndil_index])
                #print(df['data'][ndil_index])
                ndil_pos = df['Identification_Status'][ndil_index].split(";")
                break

            ndil_index += 1

        if ndil_index == df.index[-1]:
            next_flag = 'end'
            #print(next_flag)

        cur_line_pos  = df['Identification_Status'][index].split(";")
        top1 = cur_line_pos[0]
        top2 = top1
        top3 = top1
        top4 = top1
        top5 = top1
        if len(cur_line_pos) == 5:
            top5 = cur_line_pos[4]
        if len(cur_line_pos) >= 4:
            top4 = cur_line_pos[3]

        if len(cur_line_pos) >= 3:
            top3 = cur_line_pos[2]
        if len(cur_line_pos) >= 2:
            top2 = cur_line_pos[1]


        ## if cur line contains both 15 and 6
        if 'ps6' in cur_line_pos and 'ps15' in cur_line_pos:
            print("CURRENT CONATINS 15 6")
            try:
                print(data)
            except:
                pass
            print("check pdil , ndil possibilties")
            print(pdil_pos)
            print(cur_line_pos)
            print(ndil_pos)
            print(prev_flag)
            print(next_flag)
            line_new_pos = cur_line_pos
            if prev_flag != 'start' and next_flag != 'end' :
                if pdil_pos[0] == 'ps15' or pdil_pos[0] == 'ps16'  :
                    if ndil_pos[0] == 'ps7' or ndil_pos[0] == 'ps10':
                        print("remove ps15")
                        line_new_pos.remove('ps15')
                        print(line_new_pos)
                        df['Identification_Status'][index] = ";".join(line_new_pos)

                elif pdil_pos[0] == 'ps7' or pdil_pos[0] == 'ps10'  :
                    if ndil_pos[0] == 'ps1' or ndil_pos[0] == 'ps4' or ndil_pos[0] == 'ps6' or ndil_pos[0] == 'ps16':
                        print("remove ps6")
                        line_new_pos.remove('ps6')
                        df['Identification_Status'][index] = ";".join(line_new_pos)

            print("\n")


            lcp = df['lcp'][index]

            if top1 == 'ps1' or top2 == 'ps1' or top3 == 'ps1' or top4 == 'ps1' or top5 == 'ps1':
                if lcp < 60 :
                    print(pnbl_pos)
                    print(nnbl_pos)
                    print("pssible slug",data)
                    print(top1,top2,top3,top4,top5)
                    if pnbl_pos == 'ps6' or pnbl_pos == 'ps15' or pnbl_pos == 'ps16' or pnbl_pos == 'ps17':
                        if nnbl_pos == 'ps4':
                            print("line is ps1")

        cur_line_pos = df['Identification_Status'][index].split(";")
        line_new_pos = []
    #     print(df['line_no'][index])
    #     print("CHEKING")
    #     print(data)
    #     print(pnbl_pos)
    #     print(nnbl_pos)
        if "".join(nnbl_pos) == 'ps6' and df['nlb'][index] == 'N':
            line_new_pos = [ps for ps in cur_line_pos if ps != 'ps6']
            print(line_new_pos)
            df['Identification_Status'][index] = ";".join(line_new_pos)
            print("\n")

        cur_line_pos = df['Identification_Status'][index].split(";")
        line_new_pos = []
        if (nnbl_pos[0] == 'ps4' and top1 != 'ps1' and top2 != 'ps1') or nnbl_pos[0] == 'ps1' or nnbl_pos[0] == 'ps7':
            print(pnbl_pos)
            if pnbl_index != 'first' :
                if pnbl_pos[0] == 'ps5' or pnbl_top2 == 'ps5':
                    if cur_indent == pnbl_indent:
                        try:
                            print(line_no,data,"identifying as PS6")
                        except:
                            pass
                        df['Identification_Status'][index] = 'ps6'
                        df['When_Identified'][index] = 'StartTopIdentificationPart2'
                        continue

        if len(df['Identification_Status'][index].split(";")) == 1 :
            continue

        cur_line_pos = df['Identification_Status'][index].split(";")
        line_new_pos = []
        if cur_line_pos[0] in ('ps5','ps6') and cur_line_pos[1] in ('ps5','ps6'):
            if df['nlb'][index] == 'Y' and "".join(nnbl_pos) == 'ps6':
                if df['plb'][index] == 'N':
                    if cur_indent == pnbl_indent:
                        try:
                            print(line_no,data,"identifying as ps6")
                        except:
                            pass
                        df['Identification_Status'][index] = 'ps6'
                        continue
                else:
                    # remove ps5
                    line_new_pos = [ps for ps in cur_line_pos if ps != 'ps5']
                    try:
                        print(line_no,data,"removed ps5")
                    except:
                        pass
                    df['Identification_Status'][index] = ";".join(line_new_pos)
                    continue

    return df


def start_slug_identification(df):


    # loop through to examine slug
    for index in df.index[1:-1]:
        if df['isIdentified'][index] == 'Yes':
            continue


        pnbl_pos = []
        nnbl_pos = []
        pnbl_index = index -1
        if index == 0:
            pnbl_pos = ['blank']
        elif df['plb'][index] == 'N' :
            pnbl_pos = df['Identification_Status'][index-1].split(";")
            pnbl_index = index -1
        elif index - 1 == 0:
            pnpl_pos = ['blank']
        else:
            pnbl_pos = df['Identification_Status'][index-2].split(";")
            pnbl_index = index -2

        if index == df.index[-1]:
            nnbl_pos = ['blank']
        elif df['nlb'][index] == 'N' :
            nnbl_pos = df['Identification_Status'][index+1].split(";")
            nnbl_par = df['parenthetical'][index+1]
        elif index+1 == df.index[-1]:
            nnbl_pos = ['blank']
        else:
            nnbl_pos = df['Identification_Status'][index+2].split(";")
            nnbl_par = df['parenthetical'][index+2]

    #     try:
    #         if df['prvious_line_blank'][pnbl_index] == 'N' :
    #             ppnbl_pos = df['Identification_Status'][pnbl_index-1].split(";")
    #         else:
    #             ppnbl_pos = df['Identification_Status'][pnbl_index-2].split(";")
    #     except:
    #         pass


        line_no = df['line_no'][index]
        data = df['data'][index]
        cur_line_pos = df['Identification_Status'][index].split(";")
        pnbl_par = df['parenthetical'][pnbl_index]

        try:
            pnbl_top2 = pnbl_pos[1]

        except:
            pnbl_top2 = ''

    #     try:
    #         nnbl_top2 = nnbl_pos[1]

    #     except:
    #         nnbl_top2 = ''

    #     print(line_no,data)
    #     print(pnbl_pos)
    #     print(cur_line_pos)
    #     print(nnbl_pos)
        line_new_pos = []

        if "".join(pnbl_pos) == 'ps16' and not ('ps1' in nnbl_pos):
            print(nnbl_pos)
            if 'ps1' in cur_line_pos and 'ps18' in cur_line_pos:
                wt1 = int(df['ps1'][index])
                wt18 = int(df['ps18'][index])
                if wt1 > wt18:
                    print("identifying current as ps1 ")
                    try:
                        print(line_no,data)
                    except:
                        pass
                    df['Identification_Status'][index] = 'ps1'
                    df['When_Identified'][index] = 'StartIdentifyingSlug'
                    continue

#         if len(cur_line_pos) == 2:
#             if cur_line_pos[0] == 'ps1' and cur_line_pos[1] == 'ps17':
#                 wt1 = int(df['ps1'][index])
#                 wt17 = int(df['ps17'][index])
#                 if wt1 - wt17 > 20:
#                     print("identifying current as ps1 ")
#                     try:
#                         print(line_no,data)
#                     except:
#                         pass
#                     df['Identification_Status'][index] = 'ps1'
#                     df['When_Identified'][index] = 'StartIdentifyingSlug'
#                     continue

#         if len(cur_line_pos) == 3:
#             if cur_line_pos[0] == 'ps1' and cur_line_pos[1] == 'ps2' and cur_line_pos[2] == 'ps17':
#                 wt1 = int(df['ps1'][index])
#                 wt17 = int(df['ps17'][index])
#                 if wt1 - wt17 > 20:
#                     print("removing ps17 ")
#                     try:
#                         print(line_no,data)
#                     except:
#                         pass
#                     cur_line_pos = [ps != 'ps17' for ps in cur_line_pos]

#                     df['Identification_Status'][index] = ';'.join(cur_line_pos)
#                     df['When_Identified'][index] = 'StartIdentifyingSlug'
#                     continue


    return df


def start_top_identifications_part3(df):


    for index in df.index:

        if df['isIdentified'][index] == 'Yes' or  pd.isna(df['Identification_Status'][index]):
            continue


        pnbl_pos = []
        nnbl_pos = []
        pnbl_index = -1
        prev_flag = False
        next_flag =  False

        if index == 0:
            pnbl_pos = ['blank']
            pnbl_index = 'first'
        elif df['plb'][index] == 'N' :
            pnbl_pos = df['Identification_Status'][index-1].split(";")
            pnbl_index = index -1
        elif index - 1 == 0:
            pnpl_pos = ['blank']
            pnbl_index = 'first'
        else:
            pnbl_pos = df['Identification_Status'][index-2].split(";")
            pnbl_index = index -2

        if index == df.index[-1]:
            nnbl_pos = ['blank']
            nnbl_index = 'last'
        elif df['nlb'][index] == 'N' :
            nnbl_pos = df['Identification_Status'][index+1].split(";")
            nnbl_par = df['parenthetical'][index+1]
            nnbl_index = index + 1
        elif index+1 == df.index[-1]:
            nnbl_pos = ['blank']
            nnbl_index = 'last'
        else:
            nnbl_pos = df['Identification_Status'][index+2].split(";")
            nnbl_par = df['parenthetical'][index+2]
            nnbl_index = index + 2

    #     try:
    #         if df['prvious_line_blank'][pnbl_index] == 'N' :
    #             ppnbl_pos = df['Identification_Status'][pnbl_index-1].split(";")
    #         else:
    #             ppnbl_pos = df['Identification_Status'][pnbl_index-2].split(";")
    #     except:
    #         pass


        line_no = df['line_no'][index]
        data = df['data'][index]
        cur_line_pos = df['Identification_Status'][index].split(";")
        pnbl_par = df['parenthetical'][pnbl_index] if pnbl_index != 'first' else False


        try:
            pnbl_top2 = pnbl_pos[1]

        except:
            pnbl_top2 = ''

    #     try:
    #         nnbl_top2 = nnbl_pos[1]

    #     except:
    #         nnbl_top2 = ''

        cur_indent = df['ssc'][index]
        #print(cur_indent)

        #print("examining")

        #print(line_no,cur_indent,data)
        #print(cur_line_pos)
    #     print(pnbl_pos)
    #     print(cur_line_pos)
    #     print(nnbl_pos)
        line_new_pos = []

        print("\n")

        cur_line_pos  = df['Identification_Status'][index].split(";")
        top1 = cur_line_pos[0]
        top2 = top1
        top3 = top1
        top4 = top1
        top5 = top1
        if len(cur_line_pos) == 5:
            top5 = cur_line_pos[4]
        if len(cur_line_pos) >= 4:
            top4 = cur_line_pos[3]

        if len(cur_line_pos) >= 3:
            top3 = cur_line_pos[2]
        if len(cur_line_pos) >= 2:
            top2 = cur_line_pos[1]

        lcp = df['lcp'][index]


        if len(pnbl_pos) == 0:
            pnbl_pos = ['blank']


        pnbl_top1 = pnbl_pos[0]
        pnbl_top2 = pnbl_top1
        pnbl_top3 = pnbl_top1
        pnbl_top4 = pnbl_top1
        pnbl_top5 = pnbl_top1
        if len(pnbl_pos) == 5:
            pnbl_top5 = pnbl_pos[4]
        if len(pnbl_pos) >= 4:
            pnbl_top4 = pnbl_pos[3]
            print(pnbl_pos[3])

        if len(pnbl_pos) >= 3:
            pnbl_top3 = pnbl_pos[2]
        if len(pnbl_pos) >= 2:
            pnbl_top2 = pnbl_pos[1]

    #     lcp = df['last_character_placement'][index]


        nnbl_top1 = nnbl_pos[0]
        nnbl_top2 = nnbl_top1
        nnbl_top3 = nnbl_top1
    #     nnbl_top4 = nnbl_top1
    #     nnbl_top5 = nnbl_top1

        if len(nnbl_pos) >= 3:
            nnbl_top3 = nnbl_pos[2]
        if len(nnbl_pos) >= 2:
            nnbl_top2 = nnbl_pos[1]


        if top1 == 'ps1':
            try:
                print("possible slug",data)
            except:
                pass
            print(pnbl_pos)
            print(pnbl_top4)
            print(nnbl_pos)
            if  pnbl_top1 == 'ps16' or pnbl_top2 == 'ps16' or pnbl_top3 == 'ps16' or pnbl_top4 == 'ps16' or pnbl_top5 == 'ps16'  :
                if  nnbl_top1 == 'ps4' or nnbl_top2 == 'ps4' or nnbl_top3 == 'ps4':
                    print("identifying current as ps1 as between top transitiona and action")
                    try:
                        print(line_no,data)
                    except:
                        pass
                    df['Identification_Status'][index] = 'ps1'
                    df['When_Identified'][index] = 'StartIdentifyingTopsPart3'


    return df


def start_top_identifications_part4(df):

    for index in df.index:

        if df['isIdentified'][index] == 'Yes' or  pd.isna(df['Identification_Status'][index]):
            continue


        pnbl_pos = []
        nnbl_pos = []
        pnbl_index = -1
        prev_flag = False
        next_flag =  False
        pnbl_index = index -1
        nnbl_index = index +1

        if index == 0:
            pnbl_pos = ['blank']
            pnbl_index = 'first'
        elif df['plb'][index] == 'N' :
            pnbl_pos = df['Identification_Status'][index-1].split(";")
            pnbl_index = index -1
        elif index - 1 == 0:
            pnpl_pos = ['blank']
            pnbl_index = 'first'
        else:
            pnbl_pos = df['Identification_Status'][index-2].split(";")
            pnbl_index = index -2

        if index == df.index[-1]:
            nnbl_pos = ['blank']
            nnbl_index = 'last'
        elif df['nlb'][index] == 'N' :
            nnbl_pos = df['Identification_Status'][index+1].split(";")
            nnbl_par = df['parenthetical'][index+1]
            nnbl_index = index + 1
        elif index+1 == df.index[-1]:
            nnbl_pos = ['blank']
            nnbl_index = 'last'
        else:
            nnbl_pos = df['Identification_Status'][index+2].split(";")
            nnbl_par = df['parenthetical'][index+2]
            nnbl_index = index + 2

        cur_indent = df['ssc'][index]
        try:
            pnbl_indent = df['ssc'][pnbl_index]
        except:
            pnbl_indent = -1
        try:
            nnbl_indent = df['ssc'][nnbl_index]
        except:
            nnbl_indent = -1

    #     try:
    #         if df['prvious_line_blank'][pnbl_index] == 'N' :
    #             ppnbl_pos = df['Identification_Status'][pnbl_index-1].split(";")
    #         else:
    #             ppnbl_pos = df['Identification_Status'][pnbl_index-2].split(";")
    #     except:
    #         pass


        line_no = df['line_no'][index]
        data = df['data'][index]
        case = df['case'][index]
        cur_line_pos = df['Identification_Status'][index].split(";")
        pnbl_par = df['parenthetical'][pnbl_index] if pnbl_index != 'first' else False


        try:
            pnbl_top2 = pnbl_pos[1]

        except:
            pnbl_top2 = ''

    #     try:
    #         nnbl_top2 = nnbl_pos[1]

    #     except:
    #         nnbl_top2 = ''

        #cur_indent = df['data_begins/Space count'][index]
        #print(cur_indent)

        #print("examining")

        #print(line_no,cur_indent,data)
        #print(cur_line_pos)
    #     print(pnbl_pos)
    #     print(cur_line_pos)
    #     print(nnbl_pos)
        line_new_pos = []

        print("\n")

        cur_line_pos  = df['Identification_Status'][index].split(";")
        top1 = cur_line_pos[0]
        top2 = top1
        top3 = top1
        top4 = top1
        top5 = top1
        if len(cur_line_pos) == 5:
            top5 = cur_line_pos[4]
        if len(cur_line_pos) >= 4:
            top4 = cur_line_pos[3]

        if len(cur_line_pos) >= 3:
            top3 = cur_line_pos[2]
        if len(cur_line_pos) >= 2:
            top2 = cur_line_pos[1]

        lcp = df['lcp'][index]


        if len(pnbl_pos) == 0:
            pnbl_pos = ['blank']


        pnbl_top1 = pnbl_pos[0]
        pnbl_top2 = pnbl_top1
        pnbl_top3 = pnbl_top1
        pnbl_top4 = pnbl_top1
        pnbl_top5 = pnbl_top1
        if len(pnbl_pos) == 5:
            pnbl_top5 = pnbl_pos[4]
        if len(pnbl_pos) >= 4:
            pnbl_top4 = pnbl_pos[3]
            print(pnbl_pos[3])

        if len(pnbl_pos) >= 3:
            pnbl_top3 = pnbl_pos[2]
        if len(pnbl_pos) >= 2:
            pnbl_top2 = pnbl_pos[1]

    #     lcp = df['last_character_placement'][index]


        nnbl_top1 = nnbl_pos[0]
        nnbl_top2 = nnbl_top1
        nnbl_top3 = nnbl_top1
    #     nnbl_top4 = nnbl_top1
    #     nnbl_top5 = nnbl_top1

        if len(nnbl_pos) >= 3:
            nnbl_top3 = nnbl_pos[2]
        if len(nnbl_pos) >= 2:
            nnbl_top2 = nnbl_pos[1]

        ## between 15 and 6 , top 4, nlb=N
        if top1 == 'ps4' and ";".join(pnbl_pos) == 'ps15' and ";".join(nnbl_pos) == 'ps6' and case != 'AllUpper':
            if cur_indent == nnbl_indent and df['nlb'][index] == 'N':
                print("identifying current as ps4 as between dialogue and action end and top action begin")
                try:
                    print(line_no,data)
                except:
                    pass
                df['Identification_Status'][index] = 'ps4'
                df['When_Identified'][index] = 'StartIdentifyingTopsPart4'
                continue

        ## between 15,6 and 1 , top 3 has 16, nlb=Y , plb =Y
        if ('ps16' in (top1,top2,top3)) and (";".join(pnbl_pos) == 'ps15' or ";".join(pnbl_pos) == 'ps6') and ";".join(nnbl_pos) == 'ps1':
            if df['plb'][index] == 'Y' and df['nlb'][index] == 'Y' and top1 != 'ps6':
                print("identifying current as transition ")
                try:
                    print(line_no,data)
                except:
                    pass
                df['Identification_Status'][index] = 'ps16'
                df['When_Identified'][index] = 'StartIdentifyingTopsPart4'
                continue

    last_line_index = df.index[-1]
    if df['Identification_Status'][last_line_index] == 'blank':
        last_line_index -= 1

    cur_line_pos  = df['Identification_Status'][last_line_index].split(";")

    if len(cur_line_pos) > 1 :
        if cur_line_pos[0] == 'ps6' or cur_line_pos[0] == 'ps15':
            print("Identifying last line as top",cur_line_pos[0])
            try:
                print(df['line_no'][last_line_index],df['data'][last_line_index])
            except:
                pass
            df['Identification_Status'][last_line_index] = cur_line_pos[0]
            df['When_Identified'][last_line_index] = 'IdentifyingLastLine'


    for index in df.index:
        cur_line_pos  = df['Identification_Status'][index].split(";")
        if len(cur_line_pos) != 1 :
            df['isIdentified'][index] == 'No'
        else:
            df['isIdentified'][index] == 'Yes'

    return df


def start_top_identifications_part5(df):

    for index in df.index:

        if df['isIdentified'][index] == 'Yes' or  pd.isna(df['Identification_Status'][index]):
            continue


        pnbl_pos = []
        nnbl_pos = []
        pnbl_index = -1
        prev_flag = False
        next_flag =  False
        pnbl_index = index -1
        nnbl_index = index +1

        if index == 0:
            pnbl_pos = ['blank']
            pnbl_index = 'first'
        elif df['plb'][index] == 'N' :
            print(pnbl_pos)

            pnbl_pos = df['Identification_Status'][index-1].split(";")
            pnbl_index = index -1

        elif index - 1 == 0:
            pnpl_pos = ['blank']
            pnbl_index = 'first'
        else:
            pnbl_pos = df['Identification_Status'][index-2].split(";")
            pnbl_index = index -2

        if index == df.index[-1]:
            nnbl_pos = ['blank']
            nnbl_index = 'last'
        elif df['nlb'][index] == 'N' :
            nnbl_pos = df['Identification_Status'][index+1].split(";")
            nnbl_par = df['parenthetical'][index+1]
            nnbl_index = index + 1
        elif index+1 == df.index[-1]:
            nnbl_pos = ['blank']
            nnbl_index = 'last'
        else:
            nnbl_pos = df['Identification_Status'][index+2].split(";")
            nnbl_par = df['parenthetical'][index+2]
            nnbl_index = index + 2

        cur_indent = df['ssc'][index]
        try:
            pnbl_indent = df['ssc'][pnbl_index]
        except:
            pnbl_indent = -1
        try:
            nnbl_indent = df['ssc'][nnbl_index]
        except:
            nnbl_indent = -1


    #     try:
    #         if df['prvious_line_blank'][pnbl_index] == 'N' :
    #             ppnbl_pos = df['Identification_Status'][pnbl_index-1].split(";")
    #         else:
    #             ppnbl_pos = df['Identification_Status'][pnbl_index-2].split(";")
    #     except:
    #         pass


        line_no = df['line_no'][index]
        data = df['data'][index]
        cur_line_pos = df['Identification_Status'][index].split(";")
        pnbl_par = df['parenthetical'][pnbl_index] if pnbl_index != 'first' else False


        try:
            pnbl_top2 = pnbl_pos[1]

        except:
            pnbl_top2 = ''

    #     try:
    #         nnbl_top2 = nnbl_pos[1]

    #     except:
    #         nnbl_top2 = ''

        #cur_indent = df['data_begins/Space count'][index]
        #print(cur_indent)

        #print("examining")

        #print(line_no,cur_indent,data)
        #print(cur_line_pos)
    #     print(pnbl_pos)
    #     print(cur_line_pos)
    #     print(nnbl_pos)
        line_new_pos = []

        print("\n")

        cur_line_pos  = df['Identification_Status'][index].split(";")
        top1 = cur_line_pos[0]
        top2 = top1
        top3 = top1
        top4 = top1
        top5 = top1
        if len(cur_line_pos) == 5:
            top5 = cur_line_pos[4]
        if len(cur_line_pos) >= 4:
            top4 = cur_line_pos[3]

        if len(cur_line_pos) >= 3:
            top3 = cur_line_pos[2]
        if len(cur_line_pos) >= 2:
            top2 = cur_line_pos[1]

        lcp = df['lcp'][index]


        if len(pnbl_pos) == 0:
            pnbl_pos = ['blank']


        pnbl_top1 = pnbl_pos[0]
        pnbl_top2 = pnbl_top1
        pnbl_top3 = pnbl_top1
        pnbl_top4 = pnbl_top1
        pnbl_top5 = pnbl_top1
        if len(pnbl_pos) == 5:
            pnbl_top5 = pnbl_pos[4]
        if len(pnbl_pos) >= 4:
            pnbl_top4 = pnbl_pos[3]
            print(pnbl_pos[3])

        if len(pnbl_pos) >= 3:
            pnbl_top3 = pnbl_pos[2]
        if len(pnbl_pos) >= 2:
            pnbl_top2 = pnbl_pos[1]

    #     lcp = df['last_character_placement'][index]


        nnbl_top1 = nnbl_pos[0]
        nnbl_top2 = nnbl_top1
        nnbl_top3 = nnbl_top1
        nnbl_top4 = nnbl_top1
    #     nnbl_top5 = nnbl_top1

        if len(nnbl_pos) >= 4:
            nnbl_top4 = nnbl_pos[3]
        if len(nnbl_pos) >= 3:
            nnbl_top3 = nnbl_pos[2]
        if len(nnbl_pos) >= 2:
            nnbl_top2 = nnbl_pos[1]


        ## pnbl is ps5 or 4 , cur top 5 , next top 2 has 6, cur_indent = pvs indent ;then current is ps5
        if "".join(pnbl_pos) == 'ps5' or "".join(pnbl_pos) == 'ps4':
            if top1 == 'ps5' and cur_indent == nnbl_indent:
                if (nnbl_top1 == 'ps6' or nnbl_top2 == 'ps6') and (nnbl_top1 !='ps1' and nnbl_top1 !='ps2'):
                    print("identifying current as ps5 as between actions")
                    try:
                        print(line_no,data)
                    except:
                        pass
                    df['Identification_Status'][index] = 'ps5'
                    df['When_Identified'][index] = 'StartIdentifyingTopsPart5'
                    continue
                elif nnbl_top1 == 'ps5' :
                    print("identifying current as ps5 as between actions")
                    try:
                        print(line_no,data)
                    except:
                        pass
                    df['Identification_Status'][index] = 'ps5'
                    df['When_Identified'][index] = 'StartIdentifyingTopsPart5'
                    continue

        ## pnbl is ps16 , cur top2 has ps1 ,##next top 2 has 4 or 6 , declare ps1 , same indent ?
        if "".join(pnbl_pos) == 'ps16':
            if  top1 == 'ps1' or  top2 == 'ps1':
                if nnbl_top1 == 'ps4' or nnbl_top2 == 'ps4' or nnbl_top1 == 'ps6' or nnbl_top2 == 'ps6':
                    print("identifying current as ps1 as between transition and action")
                    try:
                        print(line_no,data)
                    except:
                        pass
                    df['Identification_Status'][index] = 'ps1'
                    df['When_Identified'][index] = 'StartIdentifyingTopsPart5'
                    continue

        line_new_pos = []
        ## nnbl top2 does not have ps4 remove 1,3 from current line
        ## dont remove if next line is identified as speaker and ps1 is top
        if nnbl_top1 != 'ps4' and nnbl_top2 != 'ps4' and nnbl_top3 != 'ps4' and nnbl_top4 != 'ps4' and nnbl_top1 != 'ps6' and nnbl_top2 != 'ps6':
            if (nnbl_top1 == 'ps7' or nnbl_top1 =='ps8') and (top1 == 'ps1' or top1 == 'ps3'):
                print("not removing ps1 as next is speaker and current top is slugline")
            else:
                line_new_pos = [ps for ps in cur_line_pos if ps != 'ps1' ]
                line_new_pos = [ps for ps in line_new_pos if ps != 'ps3' ]
                print("Removing ps1 ps3 from current as next does not have ps4 in top4 ps6 in top2")
                try:
                    print(line_no,data)
                except:
                    pass
                df['Identification_Status'][index] = ";".join(line_new_pos)
                #df['When_Identified'][index] = ''

        cur_line_pos = df['Identification_Status'][index].split(";")
        line_new_pos = []
        ## remove ps2 and ps18 as a possibility if right indent < 75
        lcp = df['lcp'][index]
        if lcp < 68 and ('ps2' in cur_line_pos or 'ps18' in cur_line_pos):
            print("Removing ps2 ps18 from current as lcp < 75")
            try:
                print(line_no,data)
            except:
                pass
            line_new_pos = [ps for ps in cur_line_pos if ps != 'ps2' ]
            line_new_pos = [ps for ps in line_new_pos if ps != 'ps18' ]
            df['Identification_Status'][index] = ";".join(line_new_pos)

    for index in df.index:
        #print(index)
        cur_line_pos  = df['Identification_Status'][index].split(";")
        if len(cur_line_pos) != 1 :
            df['isIdentified'][index] == 'No'
        else:
            df['isIdentified'][index] == 'Yes'

    return df


def start_top_identifications_part6(df):

    for index in df.index:

        if df['isIdentified'][index] == 'Yes' or  pd.isna(df['Identification_Status'][index]):
            continue


        pnbl_pos = []
        nnbl_pos = []
        pnbl_index = -1
        prev_flag = False
        next_flag =  False
        pnbl_index = index -1
        nnbl_index = index +1

        if index == 0:
            pnbl_pos = ['blank']
            pnbl_index = 'first'
        elif df['plb'][index] == 'N' :
            print(pnbl_pos)

            pnbl_pos = df['Identification_Status'][index-1].split(";")
            pnbl_index = index -1

        elif index - 1 == 0:
            pnpl_pos = ['blank']
            pnbl_index = 'first'
        else:
            pnbl_pos = df['Identification_Status'][index-2].split(";")
            pnbl_index = index -2

        if index == df.index[-1]:
            nnbl_pos = ['blank']
            nnbl_index = 'last'
        elif df['nlb'][index] == 'N' :
            nnbl_pos = df['Identification_Status'][index+1].split(";")
            nnbl_par = df['parenthetical'][index+1]
            nnbl_index = index + 1
        elif index+1 == df.index[-1]:
            nnbl_pos = ['blank']
            nnbl_index = 'last'
        else:
            nnbl_pos = df['Identification_Status'][index+2].split(";")
            nnbl_par = df['parenthetical'][index+2]
            nnbl_index = index + 2

        cur_indent = df['ssc'][index]
        try:
            pnbl_indent = df['ssc'][pnbl_index]
        except:
            pnbl_indent = -1
        try:
            nnbl_indent = df['ssc'][nnbl_index]
        except:
            nnbl_indent = -1


    #     try:
    #         if df['prvious_line_blank'][pnbl_index] == 'N' :
    #             ppnbl_pos = df['Identification_Status'][pnbl_index-1].split(";")
    #         else:
    #             ppnbl_pos = df['Identification_Status'][pnbl_index-2].split(";")
    #     except:
    #         pass


        line_no = df['line_no'][index]
        data = df['data'][index]
        cur_line_pos = df['Identification_Status'][index].split(";")
        pnbl_par = df['parenthetical'][pnbl_index] if pnbl_index != 'first' else False


        try:
            pnbl_top2 = pnbl_pos[1]

        except:
            pnbl_top2 = ''

    #     try:
    #         nnbl_top2 = nnbl_pos[1]

    #     except:
    #         nnbl_top2 = ''

        #cur_indent = df['data_begins/Space count'][index]
        #print(cur_indent)

        #print("examining")

        #print(line_no,cur_indent,data)
        #print(cur_line_pos)
    #     print(pnbl_pos)
    #     print(cur_line_pos)
    #     print(nnbl_pos)
        line_new_pos = []

        print("\n")

        cur_line_pos  = df['Identification_Status'][index].split(";")
        top1 = cur_line_pos[0]
        top2 = top1
        top3 = top1
        top4 = top1
        top5 = top1
        if len(cur_line_pos) == 5:
            top5 = cur_line_pos[4]
        if len(cur_line_pos) >= 4:
            top4 = cur_line_pos[3]

        if len(cur_line_pos) >= 3:
            top3 = cur_line_pos[2]
        if len(cur_line_pos) >= 2:
            top2 = cur_line_pos[1]

        lcp = df['lcp'][index]


        if len(pnbl_pos) == 0:
            pnbl_pos = ['blank']


        pnbl_top1 = pnbl_pos[0]
        pnbl_top2 = pnbl_top1
        pnbl_top3 = pnbl_top1
        pnbl_top4 = pnbl_top1
        pnbl_top5 = pnbl_top1
        if len(pnbl_pos) == 5:
            pnbl_top5 = pnbl_pos[4]
        if len(pnbl_pos) >= 4:
            pnbl_top4 = pnbl_pos[3]
            print(pnbl_pos[3])

        if len(pnbl_pos) >= 3:
            pnbl_top3 = pnbl_pos[2]
        if len(pnbl_pos) >= 2:
            pnbl_top2 = pnbl_pos[1]

    #     lcp = df['last_character_placement'][index]


        nnbl_top1 = nnbl_pos[0]
        nnbl_top2 = nnbl_top1
        nnbl_top3 = nnbl_top1
    #     nnbl_top4 = nnbl_top1
    #     nnbl_top5 = nnbl_top1

        if len(nnbl_pos) >= 3:
            nnbl_top3 = nnbl_pos[2]
        if len(nnbl_pos) >= 2:
            nnbl_top2 = nnbl_pos[1]

        ## top 1 is ps1 pnbl is 17 nnbl has ps4 in top2
        if top1 == 'ps1' and "".join(pnbl_pos) == 'ps17' :
            if nnbl_top1 == 'ps4' or nnbl_top2 == 'ps4':
                print("identifying current as ps1 as between special term and action")
                try:
                    print(line_no,data)
                except:
                    pass
                df['Identification_Status'][index] = 'ps1'
                df['When_Identified'][index] = 'StartIdentifyingTopsPart6'
                continue


    for index in df.index:
        #print(index)
        try:
            cur_line_pos  = df['Identification_Status'][index].split(";")
        except:
            print("JJJJ",index)
        if len(cur_line_pos) != 1 :
            df['isIdentified'][index] == 'No'
        else:
            df['isIdentified'][index] == 'Yes'

    return df


def start_top_identifications_part7(df):

    for index in df.index:

        if df['isIdentified'][index] == 'Yes' or  pd.isna(df['Identification_Status'][index]):
            continue


        pnbl_pos = []
        nnbl_pos = []
        pnbl_index = -1
        prev_flag = False
        next_flag =  False
        pnbl_index = index -1
        nnbl_index = index +1

        if index == 0:
            pnbl_pos = ['blank']
            pnbl_index = 'first'
        elif df['plb'][index] == 'N' :
            print(pnbl_pos)

            pnbl_pos = df['Identification_Status'][index-1].split(";")
            pnbl_index = index -1

        elif index - 1 == 0:
            pnbl_pos = ['blank']
            pnbl_index = 'first'
        else:
            pnbl_pos = df['Identification_Status'][index-2].split(";")
            pnbl_index = index -2

        if index == df.index[-1]:
            nnbl_pos = ['blank']
            nnbl_index = 'last'
        elif df['nlb'][index] == 'N' :
            nnbl_pos = df['Identification_Status'][index+1].split(";")
            nnbl_par = df['parenthetical'][index+1]
            nnbl_index = index + 1
        elif index+1 == df.index[-1]:
            nnbl_pos = ['blank']
            nnbl_index = 'last'
        else:
            nnbl_pos = df['Identification_Status'][index+2].split(";")
            nnbl_par = df['parenthetical'][index+2]
            nnbl_index = index + 2

        cur_indent = df['ssc'][index]
        try:
            pnbl_indent = df['ssc'][pnbl_index]
        except:
            pnbl_indent = -1
        try:
            nnbl_indent = df['ssc'][nnbl_index]
        except:
            nnbl_indent = -1


    #     try:
    #         if df['prvious_line_blank'][pnbl_index] == 'N' :
    #             ppnbl_pos = df['Identification_Status'][pnbl_index-1].split(";")
    #         else:
    #             ppnbl_pos = df['Identification_Status'][pnbl_index-2].split(";")
    #     except:
    #         pass


        line_no = df['line_no'][index]
        data = df['data'][index]
        cur_line_pos = df['Identification_Status'][index].split(";")
        pnbl_par = df['parenthetical'][pnbl_index] if pnbl_index != 'first' else False


        try:
            pnbl_top2 = pnbl_pos[1]

        except:
            pnbl_top2 = ''

    #     try:
    #         nnbl_top2 = nnbl_pos[1]

    #     except:
    #         nnbl_top2 = ''

        #cur_indent = df['data_begins/Space count'][index]
        #print(cur_indent)

        #print("examining")

        #print(line_no,cur_indent,data)
        #print(cur_line_pos)
    #     print(pnbl_pos)
    #     print(cur_line_pos)
    #     print(nnbl_pos)
        line_new_pos = []

        print("\n")

        cur_line_pos  = df['Identification_Status'][index].split(";")
        top1 = cur_line_pos[0]
        top2 = top1
        top3 = top1
        top4 = top1
        top5 = top1
        if len(cur_line_pos) == 5:
            top5 = cur_line_pos[4]
        if len(cur_line_pos) >= 4:
            top4 = cur_line_pos[3]

        if len(cur_line_pos) >= 3:
            top3 = cur_line_pos[2]
        if len(cur_line_pos) >= 2:
            top2 = cur_line_pos[1]

        lcp = df['lcp'][index]


        if len(pnbl_pos) == 0:
            pnbl_pos = ['blank']


        pnbl_top1 = pnbl_pos[0]
        pnbl_top2 = pnbl_top1
        pnbl_top3 = pnbl_top1
        pnbl_top4 = pnbl_top1
        pnbl_top5 = pnbl_top1
        if len(pnbl_pos) == 5:
            pnbl_top5 = pnbl_pos[4]
        if len(pnbl_pos) >= 4:
            pnbl_top4 = pnbl_pos[3]
            print(pnbl_pos[3])

        if len(pnbl_pos) >= 3:
            pnbl_top3 = pnbl_pos[2]
        if len(pnbl_pos) >= 2:
            pnbl_top2 = pnbl_pos[1]

    #     lcp = df['last_character_placement'][index]


        nnbl_top1 = nnbl_pos[0]
        nnbl_top2 = nnbl_top1
        nnbl_top3 = nnbl_top1
    #     nnbl_top4 = nnbl_top1
    #     nnbl_top5 = nnbl_top1

        if len(nnbl_pos) >= 3:
            nnbl_top3 = nnbl_pos[2]
        if len(nnbl_pos) >= 2:
            nnbl_top2 = nnbl_pos[1]

        ## top 1 and 2 are (ps6 and ps15)
        ppnbl_indent = 0
        print(top1,top2)
        if (top1 == 'ps15' and top2 == 'ps6') or (top1 == 'ps6' and top2 == 'ps15') :
            if cur_indent < pnbl_indent:
                if df['plb'][pnbl_index] == 'N':
                    ppnbl_indent = df['ssc'][pnbl_index-1]
                else:
                    ppnbl_indent = df['ssc'][pnbl_index-2]
                if str(ppnbl_indent) < str(pnbl_indent):
                    print("identifying current as ps15 as possibly followed by speaker")
                    try:
                        print(line_no,data)
                    except:
                        pass
                    df['Identification_Status'][index] = 'ps15'
                    df['When_Identified'][index] = 'StartIdentifyingTopsPart7'
                    continue
            ## commentting as previous could be dialogue middle also
#             elif cur_indent == pnbl_indent:

#                 print("identifying current as ps6 previous has same indent")
#                 try:
#                     print(line_no,data)
#                 except:
#                     pass
#                 df['Identification_Status'][index] = 'ps6'
#                 df['When_Identified'][index] = 'StartIdentifyingTopsPart7'
#                 continue


    for index in df.index:
        #print(index)
        cur_line_pos  = df['Identification_Status'][index].split(";")
        if len(cur_line_pos) != 1 :
            df['isIdentified'][index] == 'No'
        else:
            df['isIdentified'][index] == 'Yes'

    return df


def start_top_identifications_part8(df):

    for index in df.index:

        if df['isIdentified'][index] == 'Yes' or  pd.isna(df['Identification_Status'][index]):
            continue


        pnbl_pos = []
        nnbl_pos = []
        pnbl_index = -1
        prev_flag = False
        next_flag =  False
        pnbl_index = index -1
        nnbl_index = index +1

        if index == 0:
            pnbl_pos = ['blank']
            pnbl_index = 'first'
        elif df['plb'][index] == 'N' :
            print(pnbl_pos)

            pnbl_pos = df['Identification_Status'][index-1].split(";")
            pnbl_index = index -1

        elif index - 1 == 0:
            pnbl_pos = ['blank']
            pnbl_index = 'first'
        else:
            pnbl_pos = df['Identification_Status'][index-2].split(";")
            pnbl_index = index -2

        if index == df.index[-1]:
            nnbl_pos = ['blank']
            nnbl_index = 'last'
        elif df['nlb'][index] == 'N' :
            nnbl_pos = df['Identification_Status'][index+1].split(";")
            nnbl_par = df['parenthetical'][index+1]
            nnbl_index = index + 1
        elif index+1 == df.index[-1]:
            nnbl_pos = ['blank']
            nnbl_index = 'last'
        else:
            nnbl_pos = df['Identification_Status'][index+2].split(";")
            nnbl_par = df['parenthetical'][index+2]
            nnbl_index = index + 2

        cur_indent = df['ssc'][index]
        try:
            pnbl_indent = df['ssc'][pnbl_index]
        except:
            pnbl_indent = -1
        try:
            nnbl_indent = df['ssc'][nnbl_index]
        except:
            nnbl_indent = -1


    #     try:
    #         if df['prvious_line_blank'][pnbl_index] == 'N' :
    #             ppnbl_pos = df['Identification_Status'][pnbl_index-1].split(";")
    #         else:
    #             ppnbl_pos = df['Identification_Status'][pnbl_index-2].split(";")
    #     except:
    #         pass


        line_no = df['line_no'][index]
        data = df['data'][index]
        cur_line_pos = df['Identification_Status'][index].split(";")
        pnbl_par = df['parenthetical'][pnbl_index] if pnbl_index != 'first' else False


        try:
            pnbl_top2 = pnbl_pos[1]

        except:
            pnbl_top2 = ''

    #     try:
    #         nnbl_top2 = nnbl_pos[1]

    #     except:
    #         nnbl_top2 = ''

        #cur_indent = df['data_begins/Space count'][index]
        #print(cur_indent)

        #print("examining")

        #print(line_no,cur_indent,data)
        #print(cur_line_pos)
    #     print(pnbl_pos)
    #     print(cur_line_pos)
    #     print(nnbl_pos)
        line_new_pos = []

        print("\n")

        cur_line_pos  = df['Identification_Status'][index].split(";")
        top1 = cur_line_pos[0]
        top2 = top1
        top3 = top1
        top4 = top1
        top5 = top1
        if len(cur_line_pos) == 5:
            top5 = cur_line_pos[4]
        if len(cur_line_pos) >= 4:
            top4 = cur_line_pos[3]

        if len(cur_line_pos) >= 3:
            top3 = cur_line_pos[2]
        if len(cur_line_pos) >= 2:
            top2 = cur_line_pos[1]

        lcp = df['lcp'][index]


        if len(pnbl_pos) == 0:
            pnbl_pos = ['blank']


        pnbl_top1 = pnbl_pos[0]
        pnbl_top2 = pnbl_top1
        pnbl_top3 = pnbl_top1
        pnbl_top4 = pnbl_top1
        pnbl_top5 = pnbl_top1
        if len(pnbl_pos) == 5:
            pnbl_top5 = pnbl_pos[4]
        if len(pnbl_pos) >= 4:
            pnbl_top4 = pnbl_pos[3]
            print(pnbl_pos[3])

        if len(pnbl_pos) >= 3:
            pnbl_top3 = pnbl_pos[2]
        if len(pnbl_pos) >= 2:
            pnbl_top2 = pnbl_pos[1]

    #     lcp = df['last_character_placement'][index]


        nnbl_top1 = nnbl_pos[0]
        nnbl_top2 = nnbl_top1
        nnbl_top3 = nnbl_top1
    #     nnbl_top4 = nnbl_top1
    #     nnbl_top5 = nnbl_top1

        if len(nnbl_pos) >= 3:
            nnbl_top3 = nnbl_pos[2]
        if len(nnbl_pos) >= 2:
            nnbl_top2 = nnbl_pos[1]

        ## top 1 and 2 are (ps6 and ps15)
        ppnbl_indent = 0
        print(top1,top2)
        if (top1 == 'ps4' and top2 == 'ps6') or (top1 == 'ps6' and top2 == 'ps4') :
            if "".join(pnbl_pos) == 'ps1' :
                if "".join(nnbl_pos) == 'ps6' and df['nlb'][index] == 'N':
                    print("identifying current as ps4 ")
                    try:
                        print(line_no,data)
                    except:
                        pass
                    df['Identification_Status'][index] = 'ps4'
                    df['When_Identified'][index] = 'StartIdentifyingTopsPart8'
                    continue

                elif 'ps5' not in nnbl_pos and nnbl_top1 != 'ps6' :
                    print("identifying current as ps6 as possibly between slug and speaker")
                    try:
                        print(line_no,data)
                    except:
                        pass
                    df['Identification_Status'][index] = 'ps6'
                    df['When_Identified'][index] = 'StartIdentifyingTopsPart8'
                    continue


    for index in df.index:
        #print(index)
        cur_line_pos  = df['Identification_Status'][index].split(";")
        if len(cur_line_pos) != 1 :
            df['isIdentified'][index] == 'No'
        else:
            df['isIdentified'][index] == 'Yes'


    return df


#1.1
def decrease_wt_dial_between_action(df):

    def useWeights(ps):
        return int(ps.split("-")[1])
    
    
    for index in df.index:
        wt_changed = False
        if df['isIdentified'][index] == 'Yes' or  pd.isna(df['Identification_Status'][index]):
            continue


        pnbl_pos = []
        nnbl_pos = []
        pnbl_index = -1
        prev_flag = False
        next_flag =  False
        pnbl_index = index -1
        nnbl_index = index +1

        if index == 0:
            pnbl_pos = ['blank']
            pnbl_index = 'first'
        elif df['plb'][index] == 'N' :
            

            pnbl_pos = df['Identification_Status'][index-1].split(";")
            print(pnbl_pos)
            pnbl_index = index -1

        elif index - 1 == 0:
            pnpl_pos = ['blank']
            pnbl_index = 'first'
        else:
            pnbl_pos = df['Identification_Status'][index-2].split(";")
            pnbl_index = index -2

        if index == df.index[-1]:
            nnbl_pos = ['blank']
            nnbl_index = 'last'
        elif df['nlb'][index] == 'N' :
            nnbl_pos = df['Identification_Status'][index+1].split(";")
            nnbl_par = df['parenthetical'][index+1]
            nnbl_index = index + 1
        elif index+1 == df.index[-1]:
            nnbl_pos = ['blank']
            nnbl_index = 'last'
        else:
            nnbl_pos = df['Identification_Status'][index+2].split(";")
            nnbl_par = df['parenthetical'][index+2]
            nnbl_index = index + 2


        line_no = df['line_no'][index]
        data = df['data'][index]
        cur_line_pos = df['Identification_Status'][index].split(";")

        line_new_pos = []

        print("\n")

        cur_line_pos  = df['Identification_Status'][index].split(";")
        top1 = cur_line_pos[0]
        top2 = top1
        if len(cur_line_pos) >= 2:
            top2 = cur_line_pos[1]


        if len(pnbl_pos) == 0:
            pnbl_pos = ['blank']


        pnbl_top1 = pnbl_pos[0]
        nnbl_top1 = nnbl_pos[0]
        print("checking dialogue between action",index,pnbl_pos,cur_line_pos,nnbl_pos)
        ## if previous top is action start and current top (top 2 ) is dia middle then decrease weight of dialogue middle by 5
        ## also decrease wt of dialogue start by 11
        if pnbl_top1 == 'ps4' and (top1 == 'ps14' or top2 == 'ps14'):
            print(str(int(df['ps14'][index]) - 5))
            df['ps14'][index] = str(int(df['ps14'][index]) - 5)
            wt_changed = True

        ## if previous top is action start and next top1 is ps6 then increase wt of ps5 by 11
        ## also decrease wt of dialogue start by 11
        if pnbl_top1 == 'ps4' and nnbl_top1 == 'ps6':
            df['ps5'][index] = str(int(df['ps5'][index]) + 11)
            #df['ps13'][index] = str(int(df['ps13'][index]) - 11)
            wt_changed = True

   
        if not wt_changed:
            continue
        else:
            ## append the weight to the possibilites
            pos_with_weights = []
            for pos in cur_line_pos:
                print(pos)
                wt = 0
                pos_wt = str(pos)
                try:
                    wt = df[pos][index].astype(int)
                    pos_wt +=  '-' + str(wt)
                except:
                    try:
                        wt = int(df[pos][index])
                        pos_wt +=  '-' + str(wt)
                    except:
                        try:
                            wt = df[pos][index]
                            pos_wt += '-' + str(wt)
                        except:
                            continue

                print(pos_wt)
                pos_with_weights.append(pos_wt)

            # now sort in descending order using the weights as key
            pos_with_weights = sorted(pos_with_weights,key=useWeights , reverse = True)

            line_pos_string_with_weights = ';'.join([str(elem) for elem in pos_with_weights])

            df['Identification_Status_with_weights'][index] = line_pos_string_with_weights

            ## copy over to identification status without the weights but in order of decreasing weights
            pos_without_weight = []
            for pos in pos_with_weights:
                pos_without_weight.append(pos.split("-")[0])

            line_pos_string = ';'.join([str(elem) for elem in pos_without_weight])
            print(line_pos_string)
            df['Identification_Status'][index] = line_pos_string
    

    for index in df.index:
        #print(index)
        cur_line_pos  = df['Identification_Status'][index].split(";")
        if len(cur_line_pos) != 1 :
            df['isIdentified'][index] == 'No'
        else:
            df['isIdentified'][index] == 'Yes'
    return df


def examine_among_two(df):

    for index in df.index:

        if df['isIdentified'][index] == 'Yes' or  pd.isna(df['Identification_Status'][index]):
            continue


        pnbl_pos = []
        nnbl_pos = []
        pnbl_index = -1
        prev_flag = False
        next_flag =  False
        pnbl_index = index -1
        nnbl_index = index +1

        if index == 0:
            pnbl_pos = ['blank']
            pnbl_index = 'first'
        elif df['plb'][index] == 'N' :
            print(pnbl_pos)

            pnbl_pos = df['Identification_Status'][index-1].split(";")
            pnbl_index = index -1

        elif index - 1 == 0:
            pnpl_pos = ['blank']
            pnbl_index = 'first'
        else:
            pnbl_pos = df['Identification_Status'][index-2].split(";")
            pnbl_index = index -2

        if index == df.index[-1]:
            nnbl_pos = ['blank']
            nnbl_index = 'last'
        elif df['nlb'][index] == 'N' :
            nnbl_pos = df['Identification_Status'][index+1].split(";")
            nnbl_par = df['parenthetical'][index+1]
            nnbl_index = index + 1
        elif index+1 == df.index[-1]:
            nnbl_pos = ['blank']
            nnbl_index = 'last'
        else:
            nnbl_pos = df['Identification_Status'][index+2].split(";")
            nnbl_par = df['parenthetical'][index+2]
            nnbl_index = index + 2

        cur_indent = df['ssc'][index]
        try:
            pnbl_indent = df['ssc'][pnbl_index]
        except:
            pnbl_indent = -1
        try:
            nnbl_indent = df['ssc'][nnbl_index]
            nnbl_case = df['case'][nnbl_index]
        except:
            nnbl_indent = -1


    #     try:
    #         if df['prvious_line_blank'][pnbl_index] == 'N' :
    #             ppnbl_pos = df['Identification_Status'][pnbl_index-1].split(";")
    #         else:
    #             ppnbl_pos = df['Identification_Status'][pnbl_index-2].split(";")
    #     except:
    #         pass


        line_no = df['line_no'][index]
        data = df['data'][index]
        cur_line_pos = df['Identification_Status'][index].split(";")
        pnbl_par = df['parenthetical'][pnbl_index] if pnbl_index != 'first' else False
        cur_line_par = df['parenthetical'][index]
        cur_line_case = df['case'][index]

        try:
            pnbl_top2 = pnbl_pos[1]

        except:
            pnbl_top2 = ''

    #     try:
    #         nnbl_top2 = nnbl_pos[1]

    #     except:
    #         nnbl_top2 = ''

        #cur_indent = df['data_begins/Space count'][index]
        #print(cur_indent)

        #print("examining")

        #print(line_no,cur_indent,data)
        #print(cur_line_pos)
    #     print(pnbl_pos)
    #     print(cur_line_pos)
    #     print(nnbl_pos)
        line_new_pos = []

        print("\n")

        cur_line_pos  = df['Identification_Status'][index].split(";")
        top1 = cur_line_pos[0]
        top2 = top1
        top3 = top1
        top4 = top1
        top5 = top1
        if len(cur_line_pos) == 5:
            top5 = cur_line_pos[4]
        if len(cur_line_pos) >= 4:
            top4 = cur_line_pos[3]

        if len(cur_line_pos) >= 3:
            top3 = cur_line_pos[2]
        if len(cur_line_pos) >= 2:
            top2 = cur_line_pos[1]

        lcp = df['lcp'][index]


        if len(pnbl_pos) == 0:
            pnbl_pos = ['blank']


        pnbl_top1 = pnbl_pos[0]
        pnbl_top2 = pnbl_top1
        pnbl_top3 = pnbl_top1
        pnbl_top4 = pnbl_top1
        pnbl_top5 = pnbl_top1
        if len(pnbl_pos) == 5:
            pnbl_top5 = pnbl_pos[4]
        if len(pnbl_pos) >= 4:
            pnbl_top4 = pnbl_pos[3]
            print(pnbl_pos[3])

        if len(pnbl_pos) >= 3:
            pnbl_top3 = pnbl_pos[2]
        if len(pnbl_pos) >= 2:
            pnbl_top2 = pnbl_pos[1]

    #     lcp = df['last_character_placement'][index]


        nnbl_top1 = nnbl_pos[0]
        nnbl_top2 = nnbl_top1
        nnbl_top3 = nnbl_top1
    #     nnbl_top4 = nnbl_top1
    #     nnbl_top5 = nnbl_top1

        if len(nnbl_pos) >= 3:
            nnbl_top3 = nnbl_pos[2]
        if len(nnbl_pos) >= 2:
            nnbl_top2 = nnbl_pos[1]

        try:
            print(index,data,cur_line_case,top1,top2,nnbl_case,nnbl_top1)
        except:
            pass


        nnbl_new_data = ''
        before = ''
        ## if 6 and 22 left
        ## split after full stop to new line.. merge with subsequesnt transiton
        if len(cur_line_pos) == 2 and (top1 == 'ps6' or top2 == 'ps22') or (top2 == 'ps6' or top1 == 'ps22'):
            if re.search(".",data):
                print("found full stop,separating")
                parts = data.split(".")
                last = parts[-1]
                try:
                    print(last)
                except:
                    pass
                before = parts[0:-1]
                print(" ".join(before))
                print(nnbl_indent)
                if len(last.split()) == 1:
                    print("single word after full stop")
                    if "".join(nnbl_pos) == 'ps16':
                        print("next is transition , merging")
                        nnbl_data = df['data'][nnbl_index]
                        try:
                            print(nnbl_data)
                        except:
                            pass
                        nnbl_new_data = last.strip() + ' ' + nnbl_data.strip()
                        try:
                            print(nnbl_new_data)
                        except:
                            pass

                        nnbl_new_data = nnbl_new_data.rjust(len(nnbl_new_data) + int(nnbl_indent))
                        df['data'][nnbl_index] = nnbl_new_data


                        print("Splitting current and Identifying current action end")
                        df['data'][index] = " ".join(before)
                        df['Identification_Status'][index] = 'ps6'
                        df['When_Identified'][index] = 'ExamineLastTwo'
                        continue


        ## if 7 and 8.. make 7 if no parenthtical
        if len(cur_line_pos) == 2 and ((top1 == 'ps7' and top2 == 'ps8') or (top1 == 'ps8' and top2 == 'ps7')):
            if df['parenthetical'][index] == 'Absent':
                try:
                    print("Identifying as speaker as no parenthtical",data)
                except:
                    pass
                df['Identification_Status'][index] = 'ps7'
                df['When_Identified'][index] = 'ExamineLastTwo'
                continue

        ## if 1/2 and 30 left keep 1 if lcp <
        if len(cur_line_pos) == 2 and (( (top1 == 'ps1' or top1 == 'ps2') and top2 == 'ps30') or (top1 == 'ps30' and top2 == 'ps1')):
            print(index,cur_line_case,nnbl_case,nnbl_top1)
            if df['lcp'][index] <= 63:
                try:
                    print("Identifying as slugline",data)
                except:
                    pass
                df['Identification_Status'][index] = 'ps1'
                df['When_Identified'][index] = 'ExamineLastTwo'
            elif cur_line_case == 'AllUpper' and nnbl_case == 'AllUpper' and (nnbl_top1 == 'ps1' or nnbl_top1 == 'ps3'):
                try:
                    print("Identifying as slugline beginning",data)
                except:
                    pass
                df['Identification_Status'][index] = 'ps2'
                df['When_Identified'][index] = 'ExamineLastTwo'
                try:
                    print("Identifying as slugline end",df['data'][nnbl_index])
                except:
                    pass
                df['Identification_Status'][nnbl_index] = 'ps3'
                df['When_Identified'][nnbl_index] = 'ExamineLastTwo'
                continue

        ## if 15 and 29 left keep 15 if lcp <
        if len(cur_line_pos) == 2 and ((top1 == 'ps15' and top2 == 'ps29') or (top1 == 'ps29' and top2 == 'ps15')):
            if df['lcp'][index] <= 51:
                try:
                    print("Identifying as dialogue ending",data)
                except:
                    pass
                df['Identification_Status'][index] = 'ps15'
                df['When_Identified'][index] = 'ExamineLastTwo'
                continue

        if len(cur_line_pos) == 2 and ((top1 == 'ps13' and top2 == 'ps9') or (top1 == 'ps9' and top2 == 'ps13')) :
            if cur_line_par == 'Absent':
                try:
                    print("Identifying as dialogue beginning",data)
                except:
                    pass
                df['Identification_Status'][index] = 'ps13'
                df['When_Identified'][index] = 'ExamineLastTwo'
                continue

        if len(cur_line_pos) == 2:
            if cur_line_pos[0] == 'ps1' and cur_line_pos[1] == 'ps17':
                wt1 = int(df['ps1'][index])
                wt17 = int(df['ps17'][index])
                if wt1 - wt17 > 20:
                    print("identifying current as ps1 ")
                    try:
                        print(line_no,data)
                    except:
                        pass
                    df['Identification_Status'][index] = 'ps1'
                    df['When_Identified'][index] = 'ExamineLastTwo'
                    continue
        ### remove ps7 ,8 if in stopwords
        elif cur_line_pos[0] == 'ps7':
            line_new_pos = cur_line_pos
            print("Checking stop words")
            skip_words = ['ON THE SCREEN','ON THE TV','MORNING','AT HOTEL','TV','MONTAGES','MUSICAL MONTAGES','ESSENTIALS','LATER','ESSENTIAL']
            search_data = data.replace(":","")
            found_match = False
            for word in skip_words:
                if re.match(word,search_data.strip()):
                    found_match = True
                    break
            if found_match:
                try:
                    line_new_pos.remove('ps7')
                    line_new_pos.remove('ps8')
                    print("ps7,ps8 removed")
                    df['Identification_Status'][index] = ";".join(line_new_pos)
                    df['When_Identified'][index] = 'ExamineSpeakerSkipWords'
                    continue

                except:
                    print("Could not remove speaker pos")

        ### remove ps3 if pnbl top 2 does not have ps2
        else:
            line_new_pos = cur_line_pos
            print("Checking sluglineend")
            if not (pnbl_top1 == 'ps2' or pnbl_top2 == 'ps2') and cur_line_pos[0] == 'ps3':
                line_new_pos.remove('ps3')
                print("ps3 removed")
                df['Identification_Status'][index] = ";".join(line_new_pos)
                df['When_Identified'][index] = 'ExamineSluglineEnd'
                continue


    for index in df.index:
        #print(index)
        cur_line_pos  = df['Identification_Status'][index].split(";")
        if len(cur_line_pos) != 1 :
            df['isIdentified'][index] == 'No'
        else:
            df['isIdentified'][index] == 'Yes'
    return df


def examine_action_using_top2_wt_diff(df):

    for index in df.index[1:-1]:
        if df['isIdentified'][index] == 'Yes':
            continue


        pnbl_pos = []
        nnbl_pos = []
        pnbl_index = index -1
        nnbl_index = index +1

        if index == 0:
            pnbl_pos = ['blank']
        elif df['plb'][index] == 'N' :
            pnbl_pos = df['Identification_Status'][index-1].split(";")
            pnbl_index = index -1
        elif index - 1 == 0:
            pnpl_pos = ['blank']
        else:
            pnbl_pos = df['Identification_Status'][index-2].split(";")
            pnbl_index = index -2

        if index == df.index[-1]:
            nnbl_pos = ['blank']
        elif df['nlb'][index] == 'N' :
            nnbl_pos = df['Identification_Status'][index+1].split(";")
            nnbl_par = df['parenthetical'][index+1]
            nnbl_index = index +1
        elif index+1 == df.index[-1]:
            nnbl_pos = ['blank']
        else:
            nnbl_pos = df['Identification_Status'][index+2].split(";")
            nnbl_par = df['parenthetical'][index+2]
            nnbl_index = index +2


        cur_indent = df['ssc'][index]
        cur_case = df['case'][index]
        try:
            pnbl_indent = df['ssc'][pnbl_index]
            pnbl_case = df['case'][pnbl_index]
        except:
            pnbl_indent = -1
            pnbl_case = ''
        #nnbl_indent = df['ssc'][nnbl_index]

        try:
            nnbl_indent = df['ssc'][nnbl_index]
            nnbl_case = df['case'][nnbl_index]
        except:
            nnbl_indent = -1
            nnbl_case = ''

        try:
            if df['plb'][pnbl_index] == 'N' :
                ppnbl_pos = df['Identification_Status'][pnbl_index-1].split(";")
            else:
                ppnbl_pos = df['Identification_Status'][pnbl_index-2].split(";")
            ppnbl_exists = True
        except:
            ppnbl_exists = False
            pass


        line_no = df['line_no'][index]
        data = df['data'][index]
        cur_line_pos = df['Identification_Status'][index].split(";")
        pnbl_par = df['parenthetical'][pnbl_index]

        try:
            pnbl_top2 = pnbl_pos[1]

        except:
            pnbl_top2 = ''

    #     try:
    #         nnbl_top2 = nnbl_pos[1]

    #     except:
    #         nnbl_top2 = ''

    #     print(line_no,data)
    #     print(pnbl_pos)
    #     print(cur_line_pos)
    #     print(nnbl_pos)
        line_new_pos = []
        #using pnbl and nnbl identified lines refine/identify current line
    #     if "".join(pnbl_pos) in ('ps15','ps6') and cur_line_pos[0] == 'ps4':
    #         print(line_no,data)
    #         print("pnbl is 15 or 6 and current top is 'ps4'")
    #         print("Identifying as ps4")
    #         df['Identification_Status'][index] = 'ps4'
    #         cur_line_pos = ['ps4']
    #         df['When_Identified'][index] = 'RefiningActionPossibilities'


        cur_line_pos  = df['Identification_Status'][index].split(";")
        top1 = cur_line_pos[0]
        top2 = top1
        top3 = top1
        top4 = top1
        top5 = top1
        if len(cur_line_pos) == 5:
            top5 = cur_line_pos[4]
        if len(cur_line_pos) >= 4:
            top4 = cur_line_pos[3]

        if len(cur_line_pos) >= 3:
            top3 = cur_line_pos[2]
        if len(cur_line_pos) >= 2:
            top2 = cur_line_pos[1]


        top1_wt = df[top1][index]
        top2_wt = df[top2][index]

        top2_wt_diff = top1_wt - top2_wt

        ## if top is 6
        if cur_line_pos[0] == 'ps6' :
            print("top 2 wt diff",top2_wt_diff)
            if cur_indent < 25  and "".join(nnbl_pos) == 'ps1' and top2_wt_diff > 15:
                print("identifying as ps6")
                df['Identification_Status'][index] = 'ps6'
                df['When_Identified'][index] = 'ExamineActionUsingTop2Wt'

        ## if top is 5
        if cur_line_pos[0] == 'ps5' and cur_case != 'AllUpper':
            print("top 2 wt diff",top2_wt_diff)
            if pnbl_indent == cur_indent and cur_indent == nnbl_indent  and (("".join(pnbl_pos) == 'ps4' or "".join(pnbl_pos) == 'ps5') or  ("".join(nnbl_pos) == 'ps6' or "".join(nnbl_pos) == 'ps5'))and top2_wt_diff > 10 and pnbl_case != 'AllUpper' and nnbl_case != 'AllUpper' :
                print("identifying as ps5")
                df['Identification_Status'][index] = 'ps5'
                df['When_Identified'][index] = 'ExamineActionUsingTop2Wt'
    return df


def identify_top_as_final(df):


    #take the top possibility as final
    for index in df.index:

        cur_line_pos  = df['Identification_Status'][index].split(";")
        top1 = cur_line_pos[0]
        top2 = top1
        top3 = top1
        top4 = top1
        top5 = top1
        if len(cur_line_pos) == 5:
            top5 = cur_line_pos[4]
        if len(cur_line_pos) >= 4:
            top4 = cur_line_pos[3]
        if len(cur_line_pos) >= 3:
            top3 = cur_line_pos[2]
        if len(cur_line_pos) >= 2:
            top2 = cur_line_pos[1]


        if df['isIdentified'][index] == 'Yes':
            continue
        contains_slug_words = False
        data = df['data'][index]
        sp_words3 = ['INT.','EXT.','I/E','E/I','EXT-','INT-']
        for sp_word in sp_words3:
            print(sp_word)
            #search_data =  data.replace(":","")
            found = re.search(sp_word,data.strip()[0:8])
            if found:
                contains_slug_words = True
                break

        #line_pos =  df['Identification_Status'][index].split(";")
        if (top1 == 'ps1' or top1 == 'ps2') and not contains_slug_words:
            df['Identification_Status'][index] = top2
            continue


        df['Identification_Status'][index] = top1
#         df['isIdentified'][index]  = 'No'

    return df


def run_audit_on_identified_backup(df,audit_df):

    def correct_case(df,audit_df,index,new_case):
        ##
        line_no = df['line_no'][index]
        print("correcting case to",new_case)

        if new_case == 'AllUpper':
            df['data'][index]  = df['data'][index].upper()
        elif new_case == 'AllLower':
            df['data'][index]  = df['data'][index].lower()

        df['case'][index] = new_case
        #audit_df['case_format'][line_no] = new_case
        audit_df['case_corrected'][line_no] = 'Corrected to ' + str(new_case)


    def correct_left_indent(df,audit_df,index,new_indent):
        ##
        line_no = df['line_no'][index]
        data = df['data'][index]
        data = data.strip()
        print("Correcting left indent to",new_indent)
        df['data'][index]  = data.rjust(len(data)+new_indent)
        df['ssc'][index] = new_indent
        df['lcp'][index] = new_indent + len(data) - 1

        audit_df['left_indent_corrected'][line_no] = 'Left indent Corrected to ' + str(new_indent)

    def correct_right_indent(df,audit_df,index,new_lcp):
        ##
        line_no = df['line_no'][index]
        data = df['data'][index]
        data = data.strip()
        new_indent = 0
        print("Correcting right indent to",83 - new_lcp -1)
        new_indent = new_lcp - len(data) + 1
        df['data'][index]  = data.rjust(len(data) + new_indent)
        df['ssc'][index] = new_indent
        df['lcp'][index] = new_lcp

        audit_df['right_indent_corrected'][line_no] = 'Right indent Corrected to ' + str(83 - new_lcp -1)

    def delete_line_after(df,audit_df,index):
        line_no = df['line_no'][index]
        removed_line_no = df['line_no'][index+1]
        df.drop(index + 1, inplace= True)
        print("line deleted after",line_no)
        print("line no deleted ",removed_line_no)

        audit_df['blank_deleted_after'][line_no] = 'Yes'
        audit_df['line_removed'][removed_line_no] = 'Yes'


    def delete_line_before(df,audit_df,line_no):
        line_no = df['line_no'][index]
        removed_line_no = df['line_no'][index-1]
        df.drop(index - 1, inplace= True)
        print("line deleted before",line_no)

        audit_df['blank_deleted_before'][line_no] = 'Yes'
        audit_df['line_removed'][removed_line_no] = 'Yes'

    def insert_line_after(df,audit_df,index):
        line_no = df['line_no'][index]
        next_line_no = df['line_no'][index+1]
        new_line_no = (line_no + next_line_no) / 2
        if new_line_no in audit_df.index:
            new_line_no = (new_line_no + next_line_no)/2
        print("inserted blank line after ", line_no)
        df.loc[index + 0.25] = np.nan
        df.loc[index + 0.25,'Identification_Status'] = 'blank'
        df.loc[index + 0.25,'case'] = ''
        df.loc[index + 0.25,'plb'] = 'N'
        df.loc[index + 0.25,'nlb'] = 'N'
        df.loc[index + 0.25,'line_no'] = new_line_no

        df['plb'][index + 1] = 'Y'

        audit_df['blank_inserted_after'][line_no] = 'Yes'

        audit_df.loc[new_line_no] = 'No'
        audit_df.loc[new_line_no]['data'] = ''
        audit_df.loc[new_line_no]['data_corrected'] = ''
        audit_df.loc[new_line_no]['line_removed'] = 'No'

        print("line inserted after ",line_no)

    def insert_line_before(df,audit_df,index):
        line_no = df['line_no'][index]
        pvs_line_no = df['line_no'][index-1]
        new_line_no = (line_no + pvs_line_no) / 2
        if new_line_no in audit_df.index:
            new_line_no = (new_line_no + line_no)/2

        print("inserted blank line before",line_no)
        df.loc[index - 0.25] = np.nan
        df.loc[index - 0.25,'Identification_Status'] = 'blank'
        df.loc[index - 0.25,'case'] = 'None'
        df.loc[index - 0.25,'plb'] = 'N'
        df.loc[index - 0.25,'nlb'] = 'N'
        df.loc[index - 0.25,'line_no'] = new_line_no
        df['nlb'][index - 1] = 'Y'

        audit_df['blank_inserted_before'][line_no] = 'Yes'

        audit_df.loc[new_line_no] = ''
        audit_df.loc[new_line_no]['line_removed'] = 'No'


    def check_and_remove_numbers(df,audit_df,index):
        data = df['data'][index]

        start_is_num = True
        scene_num = ''
        ## check if number at start
        while start_is_num:
            sub_num = re.search('\d',data.lstrip())
            if sub_num:
                if sub_num.start() == 0:
                    data = data.replace(sub_num.group(0),'')
                    df['data'][index] = data
                    print(data)
                    scene_num += sub_num.group(0)
                    continue
            start_is_num = False
        print("scene num",scene_num)

    def audit_ps1(df,audit_df,index):

        print("Auditing Slugline")

        cur_indent = df['ssc'][index]
        cur_data = df['data'][index]
        cur_case = df['case'][index]

        try:
            print(cur_data)
        except:
            pass
        new_indent = 15
        if cur_indent != new_indent:
            correct_left_indent(df,audit_df,index,new_indent)
        else:
            print("indent already",new_indent)

        #check and correct case
        new_case = 'AllUpper'
        if  cur_case != new_case:
            correct_case(df,audit_df,index,new_case)
        else:
            print("Case already",new_case)

        ## plb nlb
        plb = df['plb'][index]
        nlb = df['nlb'][index]

        if plb == 'N':
            if index != 0 :
                insert_line_before(df,audit_df,index)
                df['plb'][index] = 'Y'
        else:
            print("previous line already blank")

        if nlb == 'N':
            insert_line_after(df,audit_df,index)
            df['nlb'][index] = 'Y'
        else:
            print("next line already blank")

        ## remove numbers if found at start
        check_and_remove_numbers(df,audit_df,index)


    def audit_ps4(df,audit_df,index):

        print("Auditing Action Beginning")
        nl_deleted = False
        cur_indent = df['ssc'][index]
        cur_data = df['data'][index]
        cur_case = df['case'][index]

        new_indent = 15
        if cur_indent != new_indent:
            correct_left_indent(df,audit_df,index,new_indent)
        else:
            print("indent already",new_indent)

        #check and correct case
    #     new_case = 'AllLower'
    #     if  cur_case != new_case:
    #         correct_case(df,audit_df,index,new_case)
    #     else:
    #         print("Case already",new_case)

        ## plb nlb
        plb = df['plb'][index]
        nlb = df['nlb'][index]

        if plb == 'N':
            insert_line_before(df,audit_df,index)
            df['plb'][index] = 'Y'
        else:
            print("previous line already blank")

        if nlb == 'Y':
            delete_line_after(df,audit_df,index)
            nl_deleted = True
            df['nlb'][index] = 'N'
        else:
            print("next line not blank")

        return nl_deleted

    def audit_ps5(df,audit_df,index):

        print("Auditing Action Middle")
        nl_deleted = False
        cur_indent = df['ssc'][index]
        cur_data = df['data'][index]
        cur_case = df['case'][index]

        new_indent = 15
        if cur_indent != new_indent:
            correct_left_indent(df,audit_df,index,new_indent)
        else:
            print("indent already",new_indent)

        #check and correct case
    #     new_case = 'AllLower'
    #     if  cur_case != new_case:
    #         correct_case(df,audit_df,index,new_case)
    #     else:
    #         print("Case already",new_case)

        ## plb nlb
        plb = df['plb'][index]
        nlb = df['nlb'][index]

        if plb == 'Y':
            delete_line_before(df,audit_df,index)
            df['plb'][index] = 'N'
        else:
            print("previous line already non blank")

        if nlb == 'Y':
            delete_line_after(df,audit_df,index)
            nl_deleted = True
            df['nlb'][index] = 'N'
        else:
            print("next line not blank")

        return nl_deleted


    def audit_ps6(df,audit_df,index):

        print("Auditing Action Ending")
        nl_deleted = False
        cur_indent = df['ssc'][index]
        cur_data = df['data'][index]
        cur_case = df['case'][index]

        new_indent = 15
        if cur_indent != new_indent:
            correct_left_indent(df,audit_df,index,new_indent)
        else:
            print("indent already",new_indent)

        #check and correct case
    #     new_case = 'AllLower'
    #     if  cur_case != new_case:
    #         correct_case(df,audit_df,index,new_case)
    #     else:
    #         print("Case already",new_case)

        ## plb nlb
        plb = df['plb'][index]
        nlb = df['nlb'][index]

        if df['Identification_Status'][index - 1] in ('ps4','ps5'):
            if plb == 'Y':
                delete_line_before(df,audit_df,index)
                df['plb'][index] = 'N'
            else:
                print("previous line already non blank")
        else:
            ## later move this to insert line before
            pnbl_line_no = df['pnbl_line_no'][index]
            try:
                pnbl_identified = True if df.loc[df['line_no'] == pnbl_line_no,'isIdentified'] == 'Yes' else False
            except:
                pnbl_identified = False

            if plb == 'N' and pnbl_identified:
                insert_line_before(df,audit_df,index)
                df['plb'][index] = 'Y'
            else:
                print("previous line already blank")


        if nlb == 'N':
            insert_line_after(df,audit_df,index)
            df['nlb'][index] = 'Y'
        else:
            print("next line already blank")


    def audit_ps7(df,audit_df,index):

        print("Auditing Speaker")
        nl_deleted = False
        cur_indent = df['ssc'][index]
        cur_data = df['data'][index]
        cur_case = df['case'][index]

        new_indent = 35
        if cur_indent != new_indent:
            correct_left_indent(df,audit_df,index,new_indent)
        else:
            print("indent already",new_indent)

        #check and correct case
        new_case = 'AllUpper'
        if  cur_case != new_case:
            correct_case(df,audit_df,index,new_case)
        else:
            print("Case already",new_case)

        ## plb nlb
        plb = df['plb'][index]
        nlb = df['nlb'][index]

        if plb == 'N':
            insert_line_before(df,audit_df,index)
            df['plb'][index] = 'Y'
        else:
            print("previous line already blank")

        if nlb == 'Y':
            delete_line_after(df,audit_df,index)
            nl_deleted = True
            df['nlb'][index] = 'N'
        else:
            print("next line not blank")

        return nl_deleted


    def audit_ps10(df,audit_df,index):

        print("Auditing Parenthetical complete")
        nl_deleted = False
        cur_indent = df['ssc'][index]
        cur_data = df['data'][index]
        cur_case = df['case'][index]

        new_indent = 30
        if cur_indent != new_indent:
            correct_left_indent(df,audit_df,index,new_indent)
        else:
            print("indent already",new_indent)

        #check and correct case
        new_case = 'AllLower'
        if  cur_case != new_case:
            correct_case(df,audit_df,index,new_case)
        else:
            print("Case already",new_case)

        ## plb nlb
        plb = df['plb'][index]
        nlb = df['nlb'][index]

        if plb == 'Y':
            delete_line_before(df,audit_df,index)
            df['plb'][index] = 'N'
        else:
            print("previous line already blank")

        if nlb == 'Y':
            delete_line_after(df,audit_df,index)
            nl_deleted = True
            df['nlb'][index] = 'N'
        else:
            print("next line not blank")

        return nl_deleted

    def audit_ps11(df,audit_df,index):

        print("Auditing Parenthetical beginning")
        nl_deleted = False
        cur_indent = df['ssc'][index]
        cur_data = df['data'][index]
        cur_case = df['case'][index]

        new_indent = 30
        if cur_indent != new_indent:
            correct_left_indent(df,audit_df,index,new_indent)
        else:
            print("indent already",new_indent)

        #check and correct case
        new_case = 'AllLower'
        if  cur_case != new_case:
            correct_case(df,audit_df,index,new_case)
        else:
            print("Case already",new_case)

        ## plb nlb
        plb = df['plb'][index]
        nlb = df['nlb'][index]

        if plb == 'Y':
            delete_line_before(df,audit_df,index)
            df['plb'][index] = 'N'
        else:
            print("previous line already blank")

        if nlb == 'Y':
            delete_line_after(df,audit_df,index)
            nl_deleted = True
            df['nlb'][index] = 'N'
        else:
            print("next line not blank")

        return nl_deleted

    def audit_ps20(df,audit_df,index):

        print("Auditing Parenthetical middle")
        nl_deleted = False
        cur_indent = df['ssc'][index]
        cur_data = df['data'][index]
        cur_case = df['case'][index]

        new_indent = 30
        if cur_indent != new_indent:
            correct_left_indent(df,audit_df,index,new_indent)
        else:
            print("indent already",new_indent)

        #check and correct case
        new_case = 'AllLower'
        if  cur_case != new_case:
            correct_case(df,audit_df,index,new_case)
        else:
            print("Case already",new_case)

        ## plb nlb
        plb = df['plb'][index]
        nlb = df['nlb'][index]

        if plb == 'Y':
            delete_line_before(df,audit_df,index)
            df['plb'][index] = 'N'
        else:
            print("previous line already blank")

        if nlb == 'Y':
            delete_line_after(df,audit_df,index)
            nl_deleted = True
            df['nlb'][index] = 'N'
        else:
            print("next line not blank")

        return nl_deleted

    def audit_ps12(df,audit_df,index):

        print("Auditing Parenthetical end")
        nl_deleted = False
        cur_indent = df['ssc'][index]
        cur_data = df['data'][index]
        cur_case = df['case'][index]

        new_indent = 30
        if cur_indent != new_indent:
            correct_left_indent(df,audit_df,index,new_indent)
        else:
            print("indent already",new_indent)

        #check and correct case
        new_case = 'AllLower'
        if  cur_case != new_case:
            correct_case(df,audit_df,index,new_case)
        else:
            print("Case already",new_case)

        ## plb nlb
        plb = df['plb'][index]
        nlb = df['nlb'][index]

        if plb == 'Y':
            delete_line_before(df,audit_df,index)
            df['plb'][index] = 'N'
        else:
            print("previous line already blank")

        if nlb == 'Y':
            delete_line_after(df,audit_df,index)
            nl_deleted = True
            df['nlb'][index] = 'N'
        else:
            print("next line not blank")

        return nl_deleted

    def audit_ps13(df,audit_df,index):

        print("Auditing Dialogue Beginning")
        nl_deleted = False
        cur_indent = df['ssc'][index]
        cur_data = df['data'][index]
        cur_case = df['case'][index]

        new_indent = 25
        if cur_indent != new_indent:
            correct_left_indent(df,audit_df,index,new_indent)
        else:
            print("indent already",new_indent)

        #check and correct case
    #     new_case = 'AllLower'
    #     if  cur_case != new_case:
    #         correct_case(df,audit_df,index,new_case)
    #     else:
    #         print("Case already",new_case)

        ## plb nlb
        plb = df['plb'][index]
        nlb = df['nlb'][index]

        if plb == 'Y':
            delete_line_before(df,audit_df,index)
            df['plb'][index] = 'N'
        else:
            print("previous line already blank")

        if nlb == 'Y':
            delete_line_after(df,audit_df,index)
            nl_deleted = True
            df['nlb'][index] = 'N'
        else:
            print("next line not blank")

        return nl_deleted

    def audit_ps14(df,audit_df,index):

        print("Auditing Dialogue Middle")
        nl_deleted = False
        cur_indent = df['ssc'][index]
        cur_data = df['data'][index]
        cur_case = df['case'][index]

        new_indent = 25
        if cur_indent != new_indent:
            correct_left_indent(df,audit_df,index,new_indent)
        else:
            print("indent already",new_indent)

        #check and correct case
    #     new_case = 'AllLower'
    #     if  cur_case != new_case:
    #         correct_case(df,audit_df,index,new_case)
    #     else:
    #         print("Case already",new_case)

        ## plb nlb
        plb = df['plb'][index]
        nlb = df['nlb'][index]

        if plb == 'Y':
            delete_line_before(df,audit_df,index)
            df['plb'][index] = 'N'
        else:
            print("previous line already blank")

        if nlb == 'Y':
            delete_line_after(df,audit_df,index)
            nl_deleted = True
            df['nlb'][index] = 'N'
        else:
            print("next line not blank")

        return nl_deleted

    def audit_ps15(df,audit_df,index):

        print("Auditing Dialogue End")
        nl_deleted = False
        cur_indent = df['ssc'][index]
        cur_data = df['data'][index]
        cur_case = df['case'][index]


        new_indent = 25
        if cur_indent != new_indent:
            correct_left_indent(df,audit_df,index,new_indent)
        else:
            print("indent already",new_indent)

    #     #check and correct case
    #     new_case = 'AllLower'
    #     if  cur_case != new_case:
    #         correct_case(df,audit_df,index,new_case)
    #     else:
    #         print("Case already",new_case)

        ## plb nlb
        plb = df['plb'][index]
        nlb = df['nlb'][index]


        if plb == 'Y':
            delete_line_before(df,audit_df,index)
            df['plb'][index] = 'N'
        else:
            print("previous line already not blank")

        if nlb == 'N':
            nl_pos = df['Identification_Status'][index+1]
            if nl_pos == 'ps10':
                print("not inserting blank as next is parenthtical")
            else:
                insert_line_after(df,audit_df,index)
                df['nlb'][index] = 'Y'
        else:
            print("next line already blank")


    def audit_ps16(df,audit_df,index):

        print("Auditing Transition")

        cur_indent = df['ssc'][index]
        cur_data = df['data'][index]
        cur_case = df['case'][index]
        cur_lcp = df['lcp'][index]

        new_lcp = 72
        if cur_lcp != new_lcp:
            correct_right_indent(df,audit_df,index,new_lcp)
        else:
            print("indent already",new_lcp)

        #check and correct case
        print(cur_case,"123")
        new_case = 'AllUpper'
        if cur_case != new_case:
            correct_case(df,audit_df,index,new_case)
        else:
            print("Case already",new_case)

        ## plb nlb
        plb = df['plb'][index]
        nlb = df['nlb'][index]

        if plb == 'N':
            insert_line_before(df,audit_df,index)
            df['plb'][index] = 'Y'
        else:
            print("previous line already blank")

        if nlb == 'N':
            insert_line_after(df,audit_df,index)
            df['nlb'][index] = 'Y'
        else:
            print("next line already blank")

    def audit_ps17(df,audit_df,index):

        print("Auditing Special Term")

        cur_indent = df['ssc'][index]
        cur_data = df['data'][index]
        cur_case = df['case'][index]

        new_indent = 15
        if cur_indent != new_indent:
            correct_left_indent(df,audit_df,index,new_indent)
        else:
            print("indent already",new_indent)

        #check and correct case
        new_case = 'AllUpper'
        if  cur_case != new_case:
            correct_case(df,audit_df,index,new_case)
        else:
            print("Case already",new_case)

        ## plb nlb
        plb = df['plb'][index]
        nlb = df['nlb'][index]

        if plb == 'N':
            insert_line_before(df,audit_df,index)
            df['plb'][index] = 'Y'
        else:
            print("previous line already blank")

        if nlb == 'N':
            insert_line_after(df,audit_df,index)
            df['nlb'][index] = 'Y'
        else:
            print("next line already blank")


    index_iter = iter(df.index)


    for index in index_iter:

        if (df['Identification_Status'][index] == 'blank'):
            continue

        nl_deleted = False
        cur_line_pos = df['Identification_Status'][index]
        fn_name  = 'audit_' + cur_line_pos
        line_no =  df['line_no'][index]
        print("\n")
        print("line no",line_no)
        print("index ",index)
        print(cur_line_pos)

        try:
            to_call_fn = locals()[fn_name]
            print(to_call_fn)
        except:
            continue

        try:
            nl_deleted = to_call_fn(df,audit_df,index)
        except:
            pass

        if nl_deleted :
            next(index_iter)


    df = df.sort_index().reset_index(drop=True)

    #df =  df.sort_values(by=['line_no']).reset_index(drop =True)

    return df


def run_audit_on_identified(df,audit_df = False):

    def correct_case(df,index,new_case,audit_df = False):
        ##
        line_no = df['line_no'][index]
        print("correcting case to",new_case)

        if new_case == 'AllUpper':
            df['data'][index]  = df['data'][index].upper()
        elif new_case == 'AllLower':
            df['data'][index]  = df['data'][index].lower()

        df['case'][index] = new_case

        try:
            if not audit_df.empty:
                audit_df['case_corrected'][line_no] = 'Corrected to ' + str(new_case)

        except:
            pass

    def correct_left_indent(df,index,new_indent,audit_df= False):
        ##
        line_no = df['line_no'][index]
        data = df['data'][index]
        data = data.strip()
        print("Correcting left indent to",new_indent)
        df['data'][index]  = data.rjust(len(data)+new_indent)
        df['ssc'][index] = new_indent
        df['lcp'][index] = new_indent + len(data) - 1

        try:
            if not audit_df.empty:
                audit_df['left_indent_corrected'][line_no] = 'Left indent Corrected to ' + str(new_indent)
        except:
            pass

    def correct_right_indent(df,index,new_lcp,audit_df=False):
        ##
        line_no = df['line_no'][index]
        data = df['data'][index]
        data = data.strip()
        new_indent = 0
        print("Correcting right indent to",83 - new_lcp -1)
        new_indent = new_lcp - len(data) + 1
        df['data'][index]  = data.rjust(len(data) + new_indent)
        df['ssc'][index] = new_indent
        df['lcp'][index] = new_lcp

        try:
            if not audit_df.empty:
                audit_df['right_indent_corrected'][line_no] = 'Right indent Corrected to ' + str(83 - new_lcp -1)
        except:
            pass

    def delete_line_after(df,index,audit_df=False):
        line_no = df['line_no'][index]
        removed_line_no = df['line_no'][index+1]
        df.drop(index + 1, inplace= True)
        print("line deleted after",line_no)
        print("line no deleted ",removed_line_no)

        try:
            if not audit_df.empty:

                audit_df['blank_deleted_after'][line_no] = 'Yes'
                audit_df['line_removed'][removed_line_no] = 'Yes'
        except:
            pass

    def delete_line_before(df,line_no,audit_df=False):
        line_no = df['line_no'][index]
        removed_line_no = df['line_no'][index-1]
        df.drop(index - 1, inplace= True)
        print("line deleted before",line_no)

        try:
            if not audit_df.empty:
                audit_df['blank_deleted_before'][line_no] = 'Yes'
                audit_df['line_removed'][removed_line_no] = 'Yes'
        except:
            pass

    def insert_line_after(df,index,audit_df=False):
        line_no = df['line_no'][index]
        next_line_no = df['line_no'][index+1]
        new_line_no = (line_no + next_line_no) / 2
        try:
            if not audit_df.empty:
                if new_line_no in audit_df.index:
                    new_line_no = (new_line_no + next_line_no)/2
        except:
            pass

        print("inserted blank line after ", line_no)
        df.loc[index + 0.25] = np.nan
        df.loc[index + 0.25,'data'] = ''
        df.loc[index + 0.25,'Identification_Status'] = 'blank'
        df.loc[index + 0.25,'case'] = ''
        df.loc[index + 0.25,'plb'] = 'N'
        df.loc[index + 0.25,'nlb'] = 'N'
        df.loc[index + 0.25,'line_no'] = new_line_no

        df['plb'][index + 1] = 'Y'

        try:
            if not audit_df.empty:
                audit_df['blank_inserted_after'][line_no] = 'Yes'

                audit_df.loc[new_line_no] = np.nan
                audit_df.loc[new_line_no]['data'] = ''
                audit_df.loc[new_line_no]['data_corrected'] = ''
                audit_df.loc[new_line_no]['line_removed'] = 'No'
        except:
            pass

        print("line inserted after ",line_no)

    def insert_line_before(df,index,audit_df=False):
        line_no = df['line_no'][index]
        pvs_line_no = df['line_no'][index-1]
        new_line_no = (line_no + pvs_line_no) / 2
        try:
            if not audit_df.empty:
                if new_line_no in audit_df.index:
                    new_line_no = (new_line_no + line_no)/2
        except:
            pass

        print("inserted blank line before",line_no)
        df.loc[index - 0.25] = np.nan
        df.loc[index - 0.25,'Identification_Status'] = 'blank'
        df.loc[index - 0.25,'data'] = ''
        df.loc[index - 0.25,'case'] = 'None'
        df.loc[index - 0.25,'plb'] = 'N'
        df.loc[index - 0.25,'nlb'] = 'N'
        df.loc[index - 0.25,'line_no'] = new_line_no
        df['nlb'][index - 1] = 'Y'

        try:
            if not audit_df.empty:
                audit_df['blank_inserted_before'][line_no] = 'Yes'

                audit_df.loc[new_line_no] = np.nan
                audit_df.loc[new_line_no]['line_removed'] = 'No'
        except:
            pass

    def check_and_remove_numbers(df,index,audit_df=False):
        data = df['data'][index]

        start_is_num = True
        scene_num = ''
        ## check if number at start
        while start_is_num:
            sub_num = re.search('\d',data.lstrip())
            if sub_num:
                if sub_num.start() == 0:
                    data = data.replace(sub_num.group(0),'')
                    df['data'][index] = data
                    print(data)
                    scene_num += sub_num.group(0)
                    continue
            start_is_num = False
        print("scene num",scene_num)

    def audit_ps1(df,index,audit_df=False):

        print("Auditing Slugline")

        cur_indent = df['ssc'][index]
        cur_data = df['data'][index]
        cur_case = df['case'][index]

        try:
            print(cur_data)
        except:
            pass
        new_indent = 15
        if cur_indent != new_indent:
            correct_left_indent(df,index,new_indent,audit_df)
        else:
            print("indent already",new_indent)

        #check and correct case
        new_case = 'AllUpper'
        if  cur_case != new_case:
            correct_case(df,index,new_case,audit_df)
        else:
            print("Case already",new_case)

        ## plb nlb
        plb = df['plb'][index]
        nlb = df['nlb'][index]

        if plb == 'N':
            if index != 0 :
                insert_line_before(df,index,audit_df)
                df['plb'][index] = 'Y'
        else:
            print("previous line already blank")

        if nlb == 'N':
            insert_line_after(df,index,audit_df)
            df['nlb'][index] = 'Y'
        else:
            print("next line already blank")

        ## remove numbers if found at start
        check_and_remove_numbers(df,index,audit_df)


    def audit_ps4(df,index,audit_df=False):

        print("Auditing Action Beginning")
        nl_deleted = False
        cur_indent = df['ssc'][index]
        cur_data = df['data'][index]
        cur_case = df['case'][index]

        new_indent = 15
        if cur_indent != new_indent:
            correct_left_indent(df,index,new_indent,audit_df)
        else:
            print("indent already",new_indent)

        #check and correct case
    #     new_case = 'AllLower'
    #     if  cur_case != new_case:
    #         correct_case(df,audit_df,index,new_case)
    #     else:
    #         print("Case already",new_case)

        ## plb nlb
        plb = df['plb'][index]
        nlb = df['nlb'][index]

        if plb == 'N':
            insert_line_before(df,index,audit_df)
            df['plb'][index] = 'Y'
        else:
            print("previous line already blank")

        if nlb == 'Y':
            delete_line_after(df,index,audit_df)
            nl_deleted = True
            df['nlb'][index] = 'N'
        else:
            print("next line not blank")

        return nl_deleted

    def audit_ps5(df,index,audit_df=False):

        print("Auditing Action Middle")
        nl_deleted = False
        cur_indent = df['ssc'][index]
        cur_data = df['data'][index]
        cur_case = df['case'][index]

        new_indent = 15
        if cur_indent != new_indent:
            correct_left_indent(df,index,new_indent,audit_df)
        else:
            print("indent already",new_indent)

        #check and correct case
    #     new_case = 'AllLower'
    #     if  cur_case != new_case:
    #         correct_case(df,audit_df,index,new_case)
    #     else:
    #         print("Case already",new_case)

        ## plb nlb
        plb = df['plb'][index]
        nlb = df['nlb'][index]

        if plb == 'Y':
            delete_line_before(df,index,audit_df)
            df['plb'][index] = 'N'
        else:
            print("previous line already non blank")

        if nlb == 'Y':
            delete_line_after(df,index,audit_df)
            nl_deleted = True
            df['nlb'][index] = 'N'
        else:
            print("next line not blank")

        return nl_deleted


    def audit_ps6(df,index,audit_df=False):

        print("Auditing Action Ending")
        nl_deleted = False
        cur_indent = df['ssc'][index]
        cur_data = df['data'][index]
        cur_case = df['case'][index]

        new_indent = 15
        if cur_indent != new_indent:
            correct_left_indent(df,index,new_indent,audit_df)
        else:
            print("indent already",new_indent)

        #check and correct case
    #     new_case = 'AllLower'
    #     if  cur_case != new_case:
    #         correct_case(df,audit_df,index,new_case)
    #     else:
    #         print("Case already",new_case)

        ## plb nlb
        plb = df['plb'][index]
        nlb = df['nlb'][index]

        if df['Identification_Status'][index - 1] in ('ps4','ps5'):
            if plb == 'Y':
                delete_line_before(df,index,audit_df)
                df['plb'][index] = 'N'
            else:
                print("previous line already non blank")
        else:
            ## later move this to insert line before
            pnbl_line_no = df['pnbl_line_no'][index]
            try:
                pnbl_identified = True if df.loc[df['line_no'] == pnbl_line_no,'isIdentified'] == 'Yes' else False
            except:
                pnbl_identified = False

            if plb == 'N' and pnbl_identified:
                insert_line_before(df,index,audit_df)
                df['plb'][index] = 'Y'
            else:
                print("previous line already blank")


        if nlb == 'N':
            insert_line_after(df,index,audit_df)
            df['nlb'][index] = 'Y'
        else:
            print("next line already blank")

    def audit_ps7(df,index,audit_df=False):

        print("Auditing Speaker")
        nl_deleted = False
        cur_indent = df['ssc'][index]
        cur_data = df['data'][index]
        cur_case = df['case'][index]

        new_indent = 35
        if cur_indent != new_indent:
            correct_left_indent(df,index,new_indent,audit_df)
        else:
            print("indent already",new_indent)

        #check and correct case
        new_case = 'AllUpper'
        if  cur_case != new_case:
            correct_case(df,index,new_case,audit_df)
        else:
            print("Case already",new_case)

        ## plb nlb
        plb = df['plb'][index]
        nlb = df['nlb'][index]

        if plb == 'N':
            insert_line_before(df,index,audit_df)
            df['plb'][index] = 'Y'
        else:
            print("previous line already blank")

        if nlb == 'Y':
            delete_line_after(df,index,audit_df)
            nl_deleted = True
            df['nlb'][index] = 'N'
        else:
            print("next line not blank")

        return nl_deleted


    def audit_ps8(df,index,audit_df=False):

        print("Auditing Speaker")
        nl_deleted = False
        cur_indent = df['ssc'][index]
        cur_data = df['data'][index]
        cur_case = df['case'][index]

        new_indent = 35
        if cur_indent != new_indent:
            correct_left_indent(df,index,new_indent,audit_df)
        else:
            print("indent already",new_indent)

        #check and correct case
        new_case = 'AllUpper'
        if  cur_case != new_case:
            correct_case(df,index,new_case,audit_df)
        else:
            print("Case already",new_case)

        ## plb nlb
        plb = df['plb'][index]
        nlb = df['nlb'][index]

        if plb == 'N':
            insert_line_before(df,index,audit_df)
            df['plb'][index] = 'Y'
        else:
            print("previous line already blank")

        if nlb == 'Y':
            delete_line_after(df,index,audit_df)
            nl_deleted = True
            df['nlb'][index] = 'N'
        else:
            print("next line not blank")

        return nl_deleted


    def audit_ps10(df,index,audit_df=False):

        print("Auditing Parenthetical complete")
        nl_deleted = False
        cur_indent = df['ssc'][index]
        cur_data = df['data'][index]
        cur_case = df['case'][index]

        new_indent = 30
        if cur_indent != new_indent:
            correct_left_indent(df,index,new_indent,audit_df)
        else:
            print("indent already",new_indent)

        #check and correct case
        new_case = 'AllLower'
        if  cur_case != new_case:
            correct_case(df,index,new_case,audit_df)
        else:
            print("Case already",new_case)

        ## plb nlb
        plb = df['plb'][index]
        nlb = df['nlb'][index]

        if plb == 'Y':
            delete_line_before(df,index,audit_df)
            df['plb'][index] = 'N'
        else:
            print("previous line already blank")

        if nlb == 'Y':
            delete_line_after(df,index,audit_df)
            nl_deleted = True
            df['nlb'][index] = 'N'
        else:
            print("next line not blank")

        return nl_deleted

    def audit_ps11(df,index,audit_df=False):

        print("Auditing Parenthetical beginning")
        nl_deleted = False
        cur_indent = df['ssc'][index]
        cur_data = df['data'][index]
        cur_case = df['case'][index]

        new_indent = 30
        if cur_indent != new_indent:
            correct_left_indent(df,index,new_indent,audit_df)
        else:
            print("indent already",new_indent)

        #check and correct case
        new_case = 'AllLower'
        if  cur_case != new_case:
            correct_case(df,index,new_case,audit_df)
        else:
            print("Case already",new_case)

        ## plb nlb
        plb = df['plb'][index]
        nlb = df['nlb'][index]

        if plb == 'Y':
            delete_line_before(df,index,audit_df)
            df['plb'][index] = 'N'
        else:
            print("previous line already blank")

        if nlb == 'Y':
            delete_line_after(df,index,audit_df)
            nl_deleted = True
            df['nlb'][index] = 'N'
        else:
            print("next line not blank")

        return nl_deleted

    def audit_ps20(df,index,audit_df=False):

        print("Auditing Parenthetical middle")
        nl_deleted = False
        cur_indent = df['ssc'][index]
        cur_data = df['data'][index]
        cur_case = df['case'][index]

        new_indent = 30
        if cur_indent != new_indent:
            correct_left_indent(df,index,new_indent,audit_df)
        else:
            print("indent already",new_indent)

        #check and correct case
        new_case = 'AllLower'
        if  cur_case != new_case:
            correct_case(df,index,new_case,audit_df)
        else:
            print("Case already",new_case)

        ## plb nlb
        plb = df['plb'][index]
        nlb = df['nlb'][index]

        if plb == 'Y':
            delete_line_before(df,index,audit_df)
            df['plb'][index] = 'N'
        else:
            print("previous line already blank")

        if nlb == 'Y':
            delete_line_after(df,index,audit_df)
            nl_deleted = True
            df['nlb'][index] = 'N'
        else:
            print("next line not blank")

        return nl_deleted

    def audit_ps12(df,index,audit_df=False):

        print("Auditing Parenthetical end")
        nl_deleted = False
        cur_indent = df['ssc'][index]
        cur_data = df['data'][index]
        cur_case = df['case'][index]

        new_indent = 30
        if cur_indent != new_indent:
            correct_left_indent(df,index,new_indent,audit_df)
        else:
            print("indent already",new_indent)

        #check and correct case
        new_case = 'AllLower'
        if  cur_case != new_case:
            correct_case(df,index,new_case,audit_df)
        else:
            print("Case already",new_case)

        ## plb nlb
        plb = df['plb'][index]
        nlb = df['nlb'][index]

        if plb == 'Y':
            delete_line_before(df,index,audit_df)
            df['plb'][index] = 'N'
        else:
            print("previous line already blank")

        if nlb == 'Y':
            delete_line_after(df,index,audit_df)
            nl_deleted = True
            df['nlb'][index] = 'N'
        else:
            print("next line not blank")

        return nl_deleted

    def audit_ps13(df,index,audit_df=False):

        print("Auditing Dialogue Beginning")
        nl_deleted = False
        cur_indent = df['ssc'][index]
        cur_data = df['data'][index]
        cur_case = df['case'][index]

        new_indent = 25
        if cur_indent != new_indent:
            correct_left_indent(df,index,new_indent,audit_df)
        else:
            print("indent already",new_indent)

        #check and correct case
    #     new_case = 'AllLower'
    #     if  cur_case != new_case:
    #         correct_case(df,audit_df,index,new_case)
    #     else:
    #         print("Case already",new_case)

        ## plb nlb
        plb = df['plb'][index]
        nlb = df['nlb'][index]

        if plb == 'Y':
            delete_line_before(df,index,audit_df)
            df['plb'][index] = 'N'
        else:
            print("previous line already blank")

        if nlb == 'Y':
            delete_line_after(df,index,audit_df)
            nl_deleted = True
            df['nlb'][index] = 'N'
        else:
            print("next line not blank")

        return nl_deleted

    def audit_ps14(df,index,audit_df=False):

        print("Auditing Dialogue Middle")
        nl_deleted = False
        cur_indent = df['ssc'][index]
        cur_data = df['data'][index]
        cur_case = df['case'][index]

        new_indent = 25
        if cur_indent != new_indent:
            correct_left_indent(df,index,new_indent,audit_df)
        else:
            print("indent already",new_indent)

        #check and correct case
    #     new_case = 'AllLower'
    #     if  cur_case != new_case:
    #         correct_case(df,audit_df,index,new_case)
    #     else:
    #         print("Case already",new_case)

        ## plb nlb
        plb = df['plb'][index]
        nlb = df['nlb'][index]

        if plb == 'Y':
            delete_line_before(df,index,audit_df)
            df['plb'][index] = 'N'
        else:
            print("previous line already blank")

        if nlb == 'Y':
            delete_line_after(df,index,audit_df)
            nl_deleted = True
            df['nlb'][index] = 'N'
        else:
            print("next line not blank")

        return nl_deleted

    def audit_ps15(df,index,audit_df=False):

        print("Auditing Dialogue End")
        nl_deleted = False
        cur_indent = df['ssc'][index]
        cur_data = df['data'][index]
        cur_case = df['case'][index]


        new_indent = 25
        if cur_indent != new_indent:
            correct_left_indent(df,index,new_indent,audit_df)
        else:
            print("indent already",new_indent)

    #     #check and correct case
    #     new_case = 'AllLower'
    #     if  cur_case != new_case:
    #         correct_case(df,audit_df,index,new_case)
    #     else:
    #         print("Case already",new_case)

        ## plb nlb
        plb = df['plb'][index]
        nlb = df['nlb'][index]


        if plb == 'Y':
            delete_line_before(df,index,audit_df)
            df['plb'][index] = 'N'
        else:
            print("previous line already not blank")

        if nlb == 'N':
            nl_pos = df['Identification_Status'][index+1]
            if nl_pos == 'ps10':
                print("not inserting blank as next is parenthtical")
            else:
                insert_line_after(df,index,audit_df)
                df['nlb'][index] = 'Y'
        else:
            print("next line already blank")


    def audit_ps16(df,index,audit_df=False):

        print("Auditing Transition")

        cur_indent = df['ssc'][index]
        cur_data = df['data'][index]
        cur_case = df['case'][index]
        cur_lcp = df['lcp'][index]

        new_lcp = 72
        if cur_lcp != new_lcp:
            correct_right_indent(df,index,new_lcp,audit_df)
        else:
            print("indent already",new_lcp)

        #check and correct case
        print(cur_case,"123")
        new_case = 'AllUpper'
        if cur_case != new_case:
            correct_case(df,index,new_case,audit_df)
        else:
            print("Case already",new_case)

        ## plb nlb
        plb = df['plb'][index]
        nlb = df['nlb'][index]

        if plb == 'N':
            insert_line_before(df,index,audit_df)
            df['plb'][index] = 'Y'
        else:
            print("previous line already blank")

        if nlb == 'N':
            insert_line_after(df,index,audit_df)
            df['nlb'][index] = 'Y'
        else:
            print("next line already blank")

    def audit_ps17(df,index,audit_df=False):

        print("Auditing Special Term")

        cur_indent = df['ssc'][index]
        cur_data = df['data'][index]
        cur_case = df['case'][index]

        new_indent = 15
        if cur_indent != new_indent:
            correct_left_indent(df,index,new_indent,audit_df)
        else:
            print("indent already",new_indent)

        #check and correct case
        new_case = 'AllUpper'
        if  cur_case != new_case:
            correct_case(df,index,new_case,audit_df)
        else:
            print("Case already",new_case)

        ## plb nlb
        plb = df['plb'][index]
        nlb = df['nlb'][index]

        if plb == 'N':
            insert_line_before(df,index,audit_df)
            df['plb'][index] = 'Y'
        else:
            print("previous line already blank")

        if nlb == 'N':
            insert_line_after(df,index,audit_df)
            df['nlb'][index] = 'Y'
        else:
            print("next line already blank")


    index_iter = iter(df.index)


    for index in index_iter:

        if (df['Identification_Status'][index] == 'blank'):
            continue

        nl_deleted = False
        cur_line_pos = df['Identification_Status'][index]
        fn_name  = 'audit_' + cur_line_pos
        line_no =  df['line_no'][index]
        print("\n")
        print("line no",line_no)
        print("index ",index)
        print(cur_line_pos)
        print(df['line_no'].dtype)

        try:
            to_call_fn = locals()[fn_name]
            print(to_call_fn)
        except:
            continue

        try:
            nl_deleted = to_call_fn(df,index,audit_df)
        except:
            pass

        if nl_deleted :
            next(index_iter)


    df = df.sort_index().reset_index(drop=True)

    #df =  df.sort_values(by=['line_no']).reset_index(drop =True)

    try:
        if not audit_df.empty:
            return df,audit_df
        else:
            return df
    except:
        return df


def merge_line_to_para(df):

    ## output columns
    ## line_no, para_no , scene_no, identified_script_element, langueage , script
    para_df = pd.DataFrame()
    para_df['para_no'] = ''
    para_df['scene_no'] = ''
    para_df['content'] = ''
    para_df['script_element'] = ''
    para_no = 0
    scene_no = 0


    index_iter = iter(df.index)
    for index in index_iter:
        line_pos = df['Identification_Status'][index]
        data = df['data'][index]
        data =  data.strip()
        #print(line_pos,data)
        para_no += 1
        para_df.loc[para_no] = ''
        para_df['para_no'][para_no] = para_no


        if line_pos == 'blank' :
            para_df['content'][para_no] = ''
            para_df['script_element'][para_no] = 'blank'
            para_df['scene_no'][para_no] = scene_no
            continue


        if line_pos == 'ps1':
            para_df['content'][para_no] = data
            para_df['script_element'][para_no] = 'slugline'
            scene_no += 1
            para_df['scene_no'][para_no] = scene_no
            continue

        if line_pos == 'ps2':
            ##merge with ps3
            if df['Identification_Status'][index+1] == 'ps3':
                nl_data = df['data'][index+1]
                nl_data =  nl_data.strip()
                merge_data = data + ' ' + nl_data
                para_df['content'][para_no] = merge_data
                para_df['script_element'][para_no] = 'slugline'
                scene_no += 1
                para_df['scene_no'][para_no] = scene_no
                next(index_iter)
                continue
            else:
                para_df['content'][para_no] = data
                para_df['script_element'][para_no] = 'slugline'
                scene_no += 1
                para_df['scene_no'][para_no] = scene_no
                continue


        if line_pos == 'ps3':
            para_df['content'][para_no] = data
            para_df['script_element'][para_no] = 'slugline'
            scene_no += 1
            para_df['scene_no'][para_no] = scene_no

            continue

        if line_pos == 'ps4':
            merge_data = data
            fwd_index = index+1
            nl_pos = df['Identification_Status'][fwd_index]

            while nl_pos == 'ps5' or nl_pos == 'ps6':
                next(index_iter)
                nl_data = df['data'][fwd_index]
                nl_data =  nl_data.strip()
                merge_data += ' ' + nl_data
                fwd_index += 1
                try:
                    nl_pos = df['Identification_Status'][fwd_index]
                except:
                    break


            para_df['content'][para_no] = merge_data
            para_df['script_element'][para_no] = 'action'
            para_df['scene_no'][para_no] = scene_no
            continue


        if line_pos == 'ps5':
            merge_data = data
            fwd_index = index+1
            nl_pos = df['Identification_Status'][fwd_index]

            while nl_pos == 'ps6':
                next(index_iter)
                nl_data = df['data'][fwd_index]
                nl_data =  nl_data.strip()
                merge_data += ' ' + nl_data
                fwd_index += 1
                try:
                    nl_pos = df['Identification_Status'][fwd_index]
                except:
                    break


            para_df['content'][para_no] = merge_data
            para_df['script_element'][para_no] = 'action'
            para_df['scene_no'][para_no] = scene_no
            continue

        if line_pos == 'ps6':
            para_df['content'][para_no] = data
            para_df['script_element'][para_no] = 'action'
            para_df['scene_no'][para_no] = scene_no
            continue

        if line_pos == 'ps7' or line_pos == 'ps8':
            para_df['content'][para_no] = data
            para_df['script_element'][para_no] = 'speaker'
            para_df['scene_no'][para_no] = scene_no
            continue

        if line_pos == 'ps9' or line_pos == 'ps10':
            para_df['content'][para_no] = data
            para_df['script_element'][para_no] = 'parenthetical'
            para_df['scene_no'][para_no] = scene_no
            continue


        if line_pos == 'ps11':
            merge_data = data
            fwd_index = index+1
            nl_pos = df['Identification_Status'][fwd_index]

            while nl_pos == 'ps20' or nl_pos == 'ps12':
                next(index_iter)
                nl_data = df['data'][fwd_index]
                nl_data =  nl_data.strip()
                merge_data += ' ' + nl_data
                fwd_index += 1
                try:
                    nl_pos = df['Identification_Status'][fwd_index]
                except:
                    break

            para_df['content'][para_no] = merge_data
            para_df['script_element'][para_no] = 'parenthetical'
            para_df['scene_no'][para_no] = scene_no
            continue

        if line_pos == 'ps20':
            merge_data = data
            fwd_index = index+1
            nl_pos = df['Identification_Status'][fwd_index]

            while nl_pos == 'ps12':
                next(index_iter)
                nl_data = df['data'][fwd_index]
                nl_data =  nl_data.strip()
                merge_data += ' ' + nl_data
                fwd_index += 1
                try:
                    nl_pos = df['Identification_Status'][fwd_index]
                except:
                    break

            para_df['content'][para_no] = merge_data
            para_df['script_element'][para_no] = 'parenthetical'
            para_df['scene_no'][para_no] = scene_no
            continue

        if line_pos == 'ps12' :
            para_df['content'][para_no] = data
            para_df['script_element'][para_no] = 'parenthetical'
            para_df['scene_no'][para_no] = scene_no
            continue


        if line_pos == 'ps13':
            merge_data = data
            fwd_index = index+1
            nl_pos = df['Identification_Status'][fwd_index]

            while nl_pos == 'ps14' or nl_pos == 'ps15':
                next(index_iter)
                nl_data = df['data'][fwd_index]
                nl_data =  nl_data.strip()
                merge_data += ' ' + nl_data
                fwd_index += 1
                try:
                    nl_pos = df['Identification_Status'][fwd_index]
                except:
                    break

            para_df['content'][para_no] = merge_data
            para_df['script_element'][para_no] = 'dialogue'
            para_df['scene_no'][para_no] = scene_no
            continue

        if line_pos == 'ps14':
            merge_data = data
            fwd_index = index+1
            nl_pos = df['Identification_Status'][fwd_index]

            while nl_pos == 'ps15':
                next(index_iter)
                nl_data = df['data'][fwd_index]
                nl_data =  nl_data.strip()
                merge_data += ' ' + nl_data
                fwd_index += 1
                try:
                    nl_pos = df['Identification_Status'][fwd_index]
                except:
                    break

            para_df['content'][para_no] = merge_data
            para_df['script_element'][para_no] = 'dialogue'
            para_df['scene_no'][para_no] = scene_no
            continue

        if line_pos == 'ps15' :
            para_df['content'][para_no] = data
            para_df['script_element'][para_no] = 'dialogue'
            para_df['scene_no'][para_no] = scene_no
            continue

        if line_pos == 'ps16' :
            para_df['content'][para_no] = data
            para_df['script_element'][para_no] = 'transition'
            para_df['scene_no'][para_no] = scene_no
            continue

        if line_pos == 'ps17' :
            para_df['content'][para_no] = data
            para_df['script_element'][para_no] = 'special_term'
            para_df['scene_no'][para_no] = scene_no
            continue

        if line_pos == 'ps27':
            para_df['content'][para_no] = data
            para_df['script_element'][para_no] = 'dialogue'
            para_df['scene_no'][para_no] = scene_no
            continue

    return para_df


def wrap_text(df,audit_df):
    # df.reset_index(inplace=True, drop=True)
    # audit_df.reset_index(inplace=True, drop=True) #df['line_no'] = df['line_no'].astype(str).astype(float)
    index_iter = iter(df.index)
    print("wrapping lines")
    print(df.dtypes)
    for index in index_iter:
        line_pos = df['Identification_Status'][index]
        if line_pos == 'blank' or df['isIdentified'][index] == 'No':
            continue

        data = df['data'][index]
        data =  data.strip()

        print("line no",df['line_no'][index],df['line_no'].dtype)

        if line_pos == 'ps1':
            print("checking Slugline")
            if len(data) > 58:
                print("Need to wrap line")
                print("data 9808",data)
                wrapped_data = textwrap.wrap(data, width = 58)
                for line in wrapped_data:
                    try:
                        print("line 9812:",line)
                    except:
                        pass


        if line_pos == 'ps6':
            action_data = ''
            action_list = []
            print("checking Action line")
            cur_lines_count = 0
            action_index = index
            while line_pos != 'blank':
                data = df['data'][action_index]
                line_no = df['line_no'][action_index]
                try:
                    print("9827\n",line_pos,line_no,data)
                except:
                    pass
                action_data = data.strip() + ' ' + action_data
                cur_lines_count += 1
                action_list.append(line_no)
                action_index -= 1
                if action_index < 0:
                    break
                try:
                    line_pos = df['Identification_Status'][action_index]
                except:
                    line_pos = ''

                if line_pos == '' or df['isIdentified'][action_index] == 'No':
                    break

            if action_index < 0:
                continue

            if line_pos == '' or df['isIdentified'][action_index] == 'No':
                continue
            action_start_index = action_index + 1
            action_data = action_data.strip()
            print("Number of action lines",cur_lines_count)
            if len(action_data) > 58:
                print("Need to wrap line")
                try:
                    print("actiob data:\n",action_data)
                except:
                    pass
                wrapped_data = textwrap.wrap(action_data, width = 58)
                print("Wrapped line 9753")
                wrapped_data_lines_count = len(wrapped_data)
                if cur_lines_count == wrapped_data_lines_count:
                    #can change the original line(s) data
                    print("cur and wrapped number of lines same")
    #                 for i in range(0,cur_lines_count):
    #                     print(wrapped_data[i])
                elif wrapped_data_lines_count > cur_lines_count:
                    lines_to_add = wrapped_data_lines_count - cur_lines_count
                    #Multiple action lines
                    print("will need to create ",lines_to_add," more lines")
                    pvs_line_no = df['line_no'][index-1]  #float
                    cur_line_no = df['line_no'][index]     #float
                    cur_line_index = index
                    pvs_line_index = cur_line_index -1
                    print("in line no 9874")
                    while lines_to_add != 0:
                        new_line_no = 0.0

                        pvs_line_no = df['line_no'][pvs_line_index]  #float
                        new_line_no = (cur_line_no + pvs_line_no ) / 2
                        while new_line_no in audit_df.index:
                            new_line_no = (cur_line_no + new_line_no)/2
                        action_list.append(new_line_no)
                        new_line_index = (cur_line_index + pvs_line_index) /2
                        df.loc[new_line_index] = np.nan
                        df.loc[new_line_index,'line_no'] = new_line_no
                        if df['Identification_Status'][pvs_line_index] == 'blank':
                            df.loc[new_line_index,'Identification_Status']  = 'ps4'
                        else:
                            df.loc[new_line_index,'Identification_Status']  = 'ps5'

                        #cur_line_no = new_line_no
                        pvs_line_index = new_line_index
                        #cur_line_index -= 1
                        lines_to_add -= 1


                elif wrapped_data_lines_count < cur_lines_count:
                    lines_to_remove =  cur_lines_count - wrapped_data_lines_count
                    print("Will need to remove ",lines_to_remove, "lines")
                    remove_index = index -1
                    #pvs_line_no = df['line_no'][remove_index]
                    while lines_to_remove != 0:
                        pvs_line_no = df['line_no'][remove_index]
                        ## remove pvs line
                        df.drop(remove_index, inplace= True)
                        audit_df['line_removed'][pvs_line_no] = 'Yes'
                        action_list.remove(pvs_line_no)
                        remove_index -= 1
                        lines_to_remove -= 1

                action_list.sort()
                print(action_list)
                ## add these lines in the original df
                print("in line no 9914")
                ## now assign the values to these lines
                wrapped_index = 0
                for line_no in action_list:
                    df_index = df.index[df['line_no'] == line_no]
                    df['data'][df_index] = wrapped_data[wrapped_index]
                    if line_no not in audit_df.index:
                        audit_df.loc[line_no] = np.nan
                        audit_df.loc[line_no,'data'] = ''
                        audit_df.loc[line_no,'data_corrected'] = ''
                    audit_df['line_wrapped_at_prescribed_right_indent'][line_no] = 'Yes'
                    wrapped_index += 1


            else:
                print("No need to wrap line")
                try:
                    print(action_data)
                except:
                    pass
                print(len(action_data))
            print("in line no 9936")

        if line_pos == 'ps15':
            dialogue_data = ''
            dialogue_list = []
            print("\n checking Dialogue line",index)
            cur_lines_count = 0
            dialogue_index = int(index)
            while line_pos not in ('ps7','ps8','ps10','ps12','ps5','ps6'): ## added 5 and 6 as wrong identification causes previous line to be ps5
                data = df['data'][dialogue_index]
                line_no = df['line_no'][dialogue_index]
                try:
                    print(dialogue_index,line_no,line_pos,data)
                except:
                    pass

                #dialogue_data = data.strip() + ' ' + dialogue_data
                try:
                    dialogue_data = data.strip() + ' ' + dialogue_data
                except:
                    data = str(data)
                    dialogue_data = data.strip() + ' ' + dialogue_data

                cur_lines_count += 1
                if dialogue_index == index:
                    df['Identification_Status'][dialogue_index] = 'ps15'
                else:
                    df['Identification_Status'][dialogue_index] = 'ps14'
                dialogue_index -= 1
                dialogue_list.append(line_no)
                print("\nprinting isIdentified: ")
                try:
                    li = df['isIdentified'][dialogue_index] == 'No'
                    print("dialogue bunch not fully identified")
                except:
                    li = ''
                    print("dialogue bunch not fully identified")
                    if li == '' or df['isIdentified'][dialogue_index] == 'No':
                        break

                # if df['isIdentified'][dialogue_index] == 'No' :
                #     print("dialogue bunch not fully identified")
                #     break
                line_pos = df['Identification_Status'][dialogue_index]

            if li == '' or  df['isIdentified'][dialogue_index] == 'No' :   #added li == ''
                print("dialogue bunch not fully identified")
                continue


            dialogue_start_index = dialogue_index + 1
            if dialogue_start_index != index:
                df['Identification_Status'][dialogue_start_index] = 'ps13'
            dialogue_data = dialogue_data.strip()
            print("Number of dialogue lines 9990",cur_lines_count)
            if len(dialogue_data) > 35:
                print("Need to wrap dialogue line 9992")
                try:
                    print(dialogue_data)
                except:
                    pass
                wrapped_data = textwrap.wrap(dialogue_data, width = 35)
                
                wrapped_data_lines_count = len(wrapped_data)
                if cur_lines_count == wrapped_data_lines_count:
                    #can change the original line(s) data
                    print("cur and wrapped number of lines same")
    #                 for i in range(0,cur_lines_count):
    #                     print(wrapped_data[i])
                elif wrapped_data_lines_count > cur_lines_count:
                    lines_to_add = wrapped_data_lines_count - cur_lines_count
                    #Multiple action lines
                    print("will need to create ",lines_to_add," more lines")
                    pvs_line_no = float(df['line_no'][index-1])
                    cur_line_no = float(df['line_no'][index])
                    cur_line_index = index
                    pvs_line_index = cur_line_index -1

                    while lines_to_add != 0:
                        new_line_no = 0.0

                        pvs_line_no = float(df['line_no'][pvs_line_index])
                        new_line_no = (cur_line_no + pvs_line_no ) / 2

                        while (new_line_no in audit_df.index) or (new_line_no in dialogue_list):
                            new_line_no = (cur_line_no + new_line_no)/2
                        new_line_no = new_line_no
                        print(cur_line_index,cur_line_no,pvs_line_no,new_line_no)
                        dialogue_list.append(new_line_no)
                        new_line_index = (cur_line_index + pvs_line_index) /2
                        df.loc[new_line_index] = np.nan
                        df.loc[new_line_index,'line_no'] = new_line_no
                        if df['Identification_Status'][pvs_line_index] in ('ps7','ps10','ps12'):
                            df.loc[new_line_index,'Identification_Status']  = 'ps13'
                        else:
                            df.loc[new_line_index,'Identification_Status']  = 'ps14'

                        #cur_line_no = new_line_no
                        #cur_line_index -= 1
                        pvs_line_index = new_line_index
                        lines_to_add -= 1


                elif wrapped_data_lines_count < cur_lines_count:
                    lines_to_remove =  cur_lines_count - wrapped_data_lines_count
                    print("Will need to remove ",lines_to_remove, "lines")
                    remove_index = index -1
                    #pvs_line_no = df['line_no'][remove_index]
                    while lines_to_remove != 0:
                        pvs_line_no = df['line_no'][remove_index]
                        ## remove pvs line
                        df.drop(remove_index, inplace= True)
                        audit_df['line_removed'][pvs_line_no] = 'Yes'
                        dialogue_list.remove(pvs_line_no)
                        remove_index -= 1
                        lines_to_remove -= 1
                try:
                    dialogue_list.sort()
                except:
                    print("converting dialogue_list to float")
                    dialogue_list = [float(value) if type(value) != int else value for value in dialogue_list]
                    dialogue_list.sort()
                print("diaogue_list",dialogue_list)
                ## add these lines in the original df
                print("in line no 10060")
                ## now assign the values to these lines
                wrapped_index = 0
                for line_no in dialogue_list:
                    df_index = df.index[df['line_no'] == line_no]
                    df['data'][df_index] = wrapped_data[wrapped_index]
                    if line_no not in audit_df.index:
                        audit_df.loc[line_no] = 'No'
                        audit_df.loc[line_no,'data'] = ''
                        audit_df.loc[line_no,'data_corrected'] = ''
                    audit_df['line_wrapped_at_prescribed_right_indent'][line_no] = 'Yes'
                    wrapped_index += 1


            else:
                print("No need to wrap line")
                try:
                    print(dialogue_data)
                except:
                    pass
                print(len(dialogue_data))

    
    # if all(isinstance(val, int) for val in df['line_no']):
    #     print("All values in 'line_no' are integers.")
    # elif all(isinstance(val, str) for val in df['line_no']):
    #     print("All values in 'line_no' are strings. Converting to floats or integers...")

    #try:
    #    df['line_no'] = df['line_no'].astype(int)
    #    print("Converted 'line_no' column to integers.")
    #except ValueError:
    #    df['line_no'] = df['line_no'].astype(float)
    #    print("Converted 'line_no' column to floats.")
    # else:
    #     print("Values in 'line_no' are of mixed types.")

    df =  df.sort_values(by=['line_no']).reset_index(drop =True)


    index_iter = iter(df.index)
    df.fillna({'data':''},inplace=True)
    for index in index_iter:
        print(index)
        line_pos = df['Identification_Status'][index]
        if line_pos == 'blank':
            continue

        data = df['data'][index]
        try:
            print("data",data)
            print(type(data))
        except:
            pass
        
        data =  data.strip()

        if line_pos == 'ps10' :
            par_data = ''
            par_list = []
            print("checking Parenthetical line")
            cur_lines_count = 0
            par_index = index

            data = df['data'][par_index]
            line_no = df['line_no'][par_index]
            try:
                print(line_pos,data)
            except:
                pass
            par_data = data.strip()
            cur_lines_count += 1

            par_list.append(line_no)
            line_pos = df['Identification_Status'][par_index]

            print("Number of parenthetical lines",cur_lines_count)
            print("index",par_index,"line_no",line_no)

            if len(par_data) > 20:
                print("Need to wrap parenthetical line 10133")
                try:
                    print(par_data)
                except:
                    pass
                wrapped_data = textwrap.wrap(par_data, width = 20)
                wrapped_data_lines_count = len(wrapped_data)

                if wrapped_data_lines_count > cur_lines_count:
                    lines_to_add = wrapped_data_lines_count - cur_lines_count
                    #Multiple par lines
                    print("will need to create ",lines_to_add," more lines")
                    pvs_line_no = df['line_no'][index-1]       #float
                    cur_line_no = df['line_no'][index]         #float
                    cur_line_index = index
                    pvs_line_index = cur_line_index -1

                    while lines_to_add != 0:

                        new_line_no = 0.0
                        pvs_line_no = df['line_no'][pvs_line_index]
                        try:
                            new_line_no = (cur_line_no + pvs_line_no ) / 2
                        except:
                            new_line_no = (float(cur_line_no) + pvs_line_no ) / 2

                        while new_line_no in audit_df.index:
                            new_line_no = (cur_line_no + new_line_no)/2
                        new_line_no = (new_line_no)
                        par_list.append(new_line_no)
                        new_line_index = (cur_line_index + pvs_line_index) /2
                        df.loc[new_line_index] = np.nan
                        df.loc[new_line_index,'line_no'] = new_line_no
                        if df['Identification_Status'][pvs_line_index] in ('ps7','ps8','ps15'):
                            df.loc[new_line_index,'Identification_Status']  = 'ps11'
                            df.loc[new_line_index,'isIdentified']  = 'Yes'
                        else:
                            df.loc[new_line_index,'Identification_Status']  = 'ps20'
                            df.loc[new_line_index,'isIdentified']  = 'Yes'

                        cur_line_no = new_line_no
                        cur_line_index = new_line_index
                        lines_to_add -= 1


                df['Identification_Status'][index] = 'ps12'
                try:
                    par_list.sort()
                except :
                    print("exception accepted:")
                    par_list = [np.array([float(x)]) if isinstance(x, str) else x for x in par_list]
                    par_list.sort()

                print("\n\npar_list:",par_list,"\n\n")
                ## add these lines in the original df

                ## now assign the values to these lines
                wrapped_index = 0
                for line_no in par_list:
                    try:
                        df_index = df.index[df['line_no'] == line_no]
                        print("try block executed\n")
                    except:
                        print("Exception:")
                        df_index = df.index[df['line_no'] == line_no[0]]
                        print("except block executed\n")

                    print("printing df_index 10200",df_index,"\n")
                    df['data'][df_index] = wrapped_data[wrapped_index]
                    print("printing audit_df:\n",audit_df.index,"\n")
                    print("checking the audit_fd:",line_no,"\n")

        
                    try:
                        if line_no not in audit_df.index:
                            audit_df.loc[line_no] = np.nan
                            audit_df.loc[line_no]['data'] = ''
                            audit_df.loc[line_no]['data_corrected'] = ''
                            print("###########try############")
                    except Exception as e:
                        print("Exception accepted:",e)

                    audit_df['line_wrapped_at_prescribed_right_indent'][line_no] = 'Yes'
                    wrapped_index += 1


            else:
                print("No need to wrap line")
                try:
                    print(par_data)
                except:
                    pass
                print(len(par_data))

    try:
        df =  df.sort_values(by=['line_no']).reset_index(drop =True)
    except:
        print("Exception 10184:")
        df['line_no'] = [np.float64(val) if isinstance(val, str) else val for val in df['line_no']]
        df =  df.sort_values(by=['line_no']).reset_index(drop =True)
    print("The df in merge_text123456789")
    print(df)
    return df


def check_slug_still_unidentified(df):

    slug_still_unidentified = False
    print("checking if all slugs were identified")
    df_unidn = df.loc[df['isIdentified'] == 'No',:]
    for index in df_unidn.index:
        try:
            line_pos = df_unidn['Identification_Status'][index].split(';')
            print(line_pos)
            line_pos = line_pos[0:2]
            print("top2 line pos",line_pos)
        except:
            line_pos = []
        for ps in line_pos:
            if ps in ['ps1','ps2','ps18']:
                slug_still_unidentified = True
                return slug_still_unidentified

    return slug_still_unidentified


def sa_wrapped_output_to_docx(para_df,output_docx):

    page_no = 1
    lines_added = 0
    output_doc = Document()
    style = output_doc.styles['Normal']
    font = style.font
    font.name = 'Courier New'
    font.size = Pt(12)
    section = output_doc.sections[0]
    section.page_height = Mm(297)
    #section.page_width = Mm(210)
    a4_right = 8.57
    section.page_width = Inches(a4_right)
    section.left_margin = Inches(1.5)


    for index in para_df.index:
        para = output_doc.add_paragraph()

        paragraph_format = para.paragraph_format

        paragraph_format.space_before = Pt(0)
        paragraph_format.space_after = Pt(0)
        paragraph_format.line_spacing = Pt(12)

        script_element = para_df['script_element'][index]
        content = para_df['content'][index]

        if script_element == 'blank':
            continue
        if script_element in ('slugline','action'):
            paragraph_format.left_indent = Inches(0)
            paragraph_format.right_indent = Inches(0)

        if script_element == 'dialogue':
            paragraph_format.left_indent = Inches(1.0)
            paragraph_format.right_indent = Inches(1.25)


        if script_element == 'parenthetical':
            paragraph_format.left_indent = Inches(1.5)
            paragraph_format.right_indent = Inches(2.25)


        if script_element == 'speaker':
            paragraph_format.left_indent = Inches(2)
            paragraph_format.right_indent = Inches(1)

        if script_element == 'transition':
            para.alignment = WD_ALIGN_PARAGRAPH.RIGHT
            paragraph_format.left_indent = Inches(2.5)
            paragraph_format.right_indent = Inches(0)

        if script_element == 'special_term':
            paragraph_format.left_indent = Inches(0)
            paragraph_format.right_indent = Inches(0)

        para.text = content


    # first_page = True


    output_doc.save(output_docx)


def sa_output_to_docx(df,output_docx,output_template):

    page_no = 1
    lines_added = 0
    output_doc = Document(output_template)
    style = output_doc.styles['Normal']
    font = style.font
    font.name = 'Courier New'
    font.size = Pt(12)
    section = output_doc.sections[0]
    section.page_height = Mm(297)
    section.page_width = Mm(210)
    #section.page_width = Inches(11)
    section.left_margin = Inches(1.5)
    header = section.header

    paragraph = output_doc.paragraphs[0]
    paragraph_format = paragraph.paragraph_format

    paragraph_format.space_before = Pt(0)
    paragraph_format.space_after = Pt(0)

    paragraph_format.line_spacing = Pt(12)

    #def add_page_number(doc,):
    slug_still_unidentified = check_slug_still_unidentified(df)
    print(slug_still_unidentified)


    if df['Identification_Status'][0] == 'blank' and  df['Identification_Status'][1] in ('ps1','ps2'):
        ## add Fade in
        data = "FADE IN:"
        data = data.rjust(len(data))
        paragraph.add_run(data)
        lines_added = 1

    elif df['Identification_Status'][0] in ('ps1','ps2'):
        ## add Fade in and blank
        data = "FADE IN:"
        data = data.rjust(len(data))
        paragraph.add_run(data)
        run = paragraph.add_run()
        run.add_break()
        lines_added = 2


    scene_no = 1
    first_page = True


    for index in df.index:
        #print("lines_added",lines_added)
        if lines_added == 56:
            ## add break if dialogue is getting separated
            if df['Identification_Status'][index] in ('ps7','ps8'):
                if df['Identification_Status'][index+1] in ('ps9','ps10'):
                    output_doc.add_page_break()
                    lines_added = 0

            elif df['Identification_Status'][index] in ('ps1','ps2'):
                ## add a page break if next line is slug
                output_doc.add_page_break()
                lines_added = 0

        if lines_added == 57:
            ## add break if dialogue is getting separated
            if df['Identification_Status'][index] in ('ps7','ps8'):
                output_doc.add_page_break()
                lines_added = 0
            elif df['Identification_Status'][index] in ('ps1','ps2'):
                ## add a page break if next line is slug
                output_doc.add_page_break()
                lines_added = 0

        if lines_added == 58:
            lines_added = 0
    #     elif lines_added == 58:
    #         lines_added = 0
    #         first_page = False

        pos = df['Identification_Status'][index]
        data = df['data'][index]
        try:
            print(index,data)
        except:
            pass


        if slug_still_unidentified:
            para = output_doc.add_paragraph()
            paragraph_format = para.paragraph_format

            paragraph_format.space_before = Pt(0)
            paragraph_format.space_after = Pt(0)

            paragraph_format.line_spacing = Pt(12)
            if data.strip():
                para.text = data[15:]
            else:
                para.text = ''

        else:

            # to do , get scene number
            scene_data = str(scene_no)
            left_indent = 12
            if pos == 'ps1' or pos == 'ps2':
                print("Removing already present scene number")
                print("Adding scene number")
                if scene_no < 9:
                    data = scene_data + '  ' + data.lstrip()
                elif scene_no < 100:
                    data = scene_data + ' ' + data.lstrip()
                else:
                    data = scene_data  + data.lstrip()

                data = data.rjust(len(data) + left_indent)

                data = data.rstrip()
                scene_indent = 63 - len(data.strip())
                scene_data = scene_data.rjust(scene_indent)
                data = data + scene_data
                scene_no += 1


                para = output_doc.add_paragraph()
                paragraph_format = para.paragraph_format

                paragraph_format.space_before = Pt(0)
                paragraph_format.space_after = Pt(0)

                paragraph_format.line_spacing = Pt(12)
                paragraph_format.left_indent = -Inches(0.3)

                para.text = data[12:]

            else:
                para = output_doc.add_paragraph()
                paragraph_format = para.paragraph_format

                paragraph_format.space_before = Pt(0)
                paragraph_format.space_after = Pt(0)

                paragraph_format.line_spacing = Pt(12)
                if data.strip():
                    para.text = data[15:]
                else:
                    para.text = ''


        lines_added += 1


    ## add Fade out
    data = "FADE OUT:"
    data = data.rjust( 58 - len(data))
    para = output_doc.add_paragraph()
    paragraph_format = para.paragraph_format

    paragraph_format.space_before = Pt(0)
    paragraph_format.space_after = Pt(0)

    paragraph_format.line_spacing = Pt(12)
    para.text = data


    output_doc.save(output_docx)


# def sa_txt_to_docx(script_txt,output_script_docx):

#     output_template_name = 'ScriptTemplate5.docx'
#     output_template = os.path.join(mypath,output_template_name)

#     new_doc = Document(output_template)
#     style = new_doc.styles['Normal']
#     font = style.font
#     font.name = 'Courier New'
#     font.size = Pt(12)
#     section = new_doc.sections[0]
#     section.page_height = Mm(297)
#     section.page_width = Mm(210)
#     #section.page_width = Inches(11)
#     section.left_margin = Inches(1.5)
#     header = section.header

#     with open(script_txt,'r',encoding='utf-8') as txt_in:
#         lines = txt_in.readlines()
#         for line in lines:
#             para = new_doc.add_paragraph()
#             paragraph_format = para.paragraph_format

#             paragraph_format.space_before = Pt(0)
#             paragraph_format.space_after = Pt(0)

#             paragraph_format.line_spacing = Pt(12)

#             if line.strip():
#                 para.text = line[15:]
#             else:
#                 para.text = ''

#     new_doc.save(output_script_docx)

def sa_output_to_txt(output_script_docx,output_script_txt):

    from docx import Document
    from docx.shared import Pt
    from docx.shared import Mm

    read_doc = Document(output_script_docx)
    all_paras = read_doc.paragraphs
    first = all_paras[0].paragraph_format
    #print(first.left_indent)
    #count = 1
    print(len(all_paras))
    left_margin = 15

    with open(output_script_txt, 'w', encoding='utf-8') as f:
        for para in all_paras:
            paragraph_format = para.paragraph_format
            fli =0
            li =0
            ri =0
            try:
                fli = paragraph_format.first_line_indent.inches

            except:
                pass
            try:

                li = paragraph_format.left_indent.inches
            except:
                pass

            try:

                ri = paragraph_format.right_indent.inches
            except:
                pass
            indent = int((fli + li ) * 10)
            print(fli,li,indent,ri)
            data = para.text
            lines = data.split('\n')
            print(len(lines))
            for line in lines:
                try:
                    print(line)
                except:
                    pass
                line = line.rjust(len(line) + indent + left_margin)
                try:
                    print(line)
                except:
                    pass

                f.write(line)
                f.write('\n')

def print_audit_report_docx(audit_df,audit_report_docx):

    #line_removed	header	left_indent_corrected	right_indent_corrected	line_wrapped_at_prescribed_right_indent	case_corrected	#blank_inserted_before	blank_inserted_after	blank_deleted_before	blank_deleted_after	space_removed_between_characters	#space_added_between_characters	line_merged_with_next_line	line_broken_into_multiple_lines	punctuation_mark_added	#punctuation_mark_removed

    output_doc = Document()
    para = output_doc.add_paragraph()
    para.alignment = WD_ALIGN_PARAGRAPH.CENTER

    run = para.add_run()
    run.text = ' Audit Report'
    run.add_break()
    run.add_break()

    for index in audit_df.index:

        para = output_doc.add_paragraph()

        data = "Line No: " + str(index)
        run = para.add_run()
        run.add_break()
        run.text = data
        run.add_break()
        run.add_break()
        #para.add_run(data)

        cur_data = audit_df['data'][index]


        data = "Current Data: " + cur_data
        run = para.add_run()
        run.text = data
        run.add_break()

        if audit_df['line_removed'][index] == 'Yes':
            data = "Line was removed"
            run = para.add_run()
            run.text = data
            run.add_break()
            continue


        new_data = audit_df['data_corrected'][index]
        data = "Corrected Data: " + new_data
        run = para.add_run()
        run.text = data
        run.add_break()

        data = "Changes Done:- "
        run = para.add_run()
        run.text = data
        run.add_break()

        sno = 1
        changes_done = False

        if audit_df['left_indent_corrected'][index] != 'No':
            change_comment = audit_df['left_indent_corrected'][index]

            data = str(sno) + '. ' + change_comment
            run = para.add_run()
            run.text = data
            run.add_break()
            sno += 1
            changes_done = True

        if audit_df['right_indent_corrected'][index] != 'No':
            change_comment = audit_df['right_indent_corrected'][index]

            data = str(sno) + '. ' + change_comment
            run = para.add_run()
            run.text = data
            run.add_break()
            sno += 1
            changes_done = True

        if audit_df['case_corrected'][index] != 'No':
            change_comment = 'Case ' + audit_df['case_corrected'][index]

            data = str(sno) + '. ' + change_comment
            run = para.add_run()
            run.text = data
            run.add_break()
            sno += 1
            changes_done = True

        if audit_df['line_wrapped_at_prescribed_right_indent'][index] != 'No':
            change_comment = 'Line Wrapped at Prescribed Right Indent'

            data = str(sno) + '. ' + change_comment
            run = para.add_run()
            run.text = data
            run.add_break()
            sno += 1
            changes_done = True

        if audit_df['line_broken_into_multiple_lines'][index] != 'No':
            change_comment = 'Line Broken into Multiple Lines'

            data = str(sno) + '. ' + change_comment
            run = para.add_run()
            run.text = data
            run.add_break()
            sno += 1
            changes_done = True

        if audit_df['line_merged_with_next_line'][index] != 'No':
            change_comment = 'Line Merged with Next Line'

            data = str(sno) + '. ' + change_comment
            run = para.add_run()
            run.text = data
            run.add_break()
            sno += 1
            changes_done = True


        if not changes_done:
            data = 'No Changes Done'
            run = para.add_run()
            run.text = data
            run.add_break()


    output_doc.save(audit_report_docx)

def ps_to_script_element(ps):
    if ps == 'ps1':
        return 'Slugline'
    elif ps == 'ps2':
        return 'Slugline'
    elif ps == 'ps3':
        return 'Slugline'
    elif ps == 'ps4':
        return 'Action'
    elif ps == 'ps5':
        return 'Action'
    elif ps == 'ps6':
        return 'Action'
    elif ps == 'ps7':
        return 'Speaker'
    elif ps == 'ps8':
        return 'Speaker with Extension'
    elif ps == 'ps9':
        return 'Speaker Extension'
    elif ps == 'ps10':
        return 'Parenthetical'
    elif ps == 'ps11':
        return 'Parenthetical'
    elif ps == 'ps12':
        return 'Parenthetical'
    elif ps == 'ps20':
        return 'Parenthetical'
    elif ps == 'ps13':
        return 'Dialogue'
    elif ps == 'ps14':
        return 'Dialogue'
    elif ps == 'ps15':
        return 'Dialogue'
    elif ps == 'ps16':
        return 'Transition'
    elif ps == 'ps17':
        return 'Special Term'
#     elif ps == 'ps0':
#         return 'Title Lines'
    else:
        return ''


# def print_audit_report_tabular_docx(audit_df):
#     print("inside audit report")
#     #line_removed	header	left_indent_corrected	right_indent_corrected	line_wrapped_at_prescribed_right_indent	case_corrected	#blank_inserted_before	blank_inserted_after	blank_deleted_before	blank_deleted_after	space_removed_between_characters	#space_added_between_characters	line_merged_with_next_line	line_broken_into_multiple_lines	punctuation_mark_added	#punctuation_mark_removed

#     output_doc = Document()

#     style = output_doc.styles['Normal']
#     font = style.font
#     #font.name = 'Courier New'
#     font.size = Pt(8)

#     section = output_doc.sections[-1]
#     section.orientation = WD_ORIENT.LANDSCAPE

#     section.page_width = Inches(11)
#     section.left_margin = Inches(0.25)
#     section.right_margin = Inches(0.25)

#     para = output_doc.add_paragraph()
#     para.alignment = WD_ALIGN_PARAGRAPH.CENTER

#     run = para.add_run()
#     font = run.font
#     font.size = Pt(12)
#     run.text = ' Audit Report'
#     run.add_break()
#     run.add_break()

#     para = output_doc.add_paragraph()
#     para.alignment = WD_ALIGN_PARAGRAPH.LEFT
#     run = para.add_run()
#     font = run.font
#     font.size = Pt(10)
#     run.text = ' Audit Summary'

#     para = output_doc.add_paragraph()
#     run = para.add_run()
#     font = run.font
#     font.size = Pt(9)
#     print("audit summary column is created")
#     case_corrected_count = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['case_corrected'] != 'No'),:])
#     left_indent_corrected_count = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['left_indent_corrected'] != 'No'),:])
#     right_indent_corrected_count = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['right_indent_corrected'] != 'No'),:])
#     wrapped_lines_count = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['line_wrapped_at_prescribed_right_indent'] != 'No'),:])


#     table =output_doc.add_table(1, cols =2)
#     table.style = 'Table Grid'

#     font.size = Pt(9)

#     heading_cells = table.rows[0].cells
#     heading_cells[0].width = Inches(2)
#     heading_cells[1].width = Inches(1)
#     heading_cells[0].text = 'Type of Change Done'
#     heading_cells[1].text = 'Count of Lines'
#     for i in range(0,2):
#         heading_cells[i].paragraphs[0].runs[0].font.bold = True

#     cells = table.add_row().cells
#     font.size = Pt(8)
#     cells[0].width = Inches(2)
#     cells[0].text = 'Case Corrected'
#     cells[1].width = Inches(0.5)
#     cells[1].text = str(case_corrected_count)


#     cells = table.add_row().cells
#     font.size = Pt(8)
#     cells[0].width = Inches(2)
#     cells[0].text = 'Left Indent Corrected'
#     cells[1].width = Inches(0.5)
#     cells[1].text = str(left_indent_corrected_count)


#     cells = table.add_row().cells
#     font.size = Pt(8)
#     cells[0].width = Inches(2)
#     cells[0].text = 'Case Corrected'
#     cells[1].width = Inches(0.5)
#     cells[1].text = str(right_indent_corrected_count)

#     cells = table.add_row().cells
#     font.size = Pt(8)
#     cells[0].width = Inches(2)
#     cells[0].text = 'Lines Wrapped at prescribed indents'
#     cells[1].width = Inches(0.5)
#     cells[1].text = str(wrapped_lines_count)


# #     run.add_break()
#     para = output_doc.add_paragraph()
#     run = para.add_run()
#     run.add_break()
#     run.add_break()

#     font.size = Pt(8)

#     no_rows = len(audit_df.index)
#     table =output_doc.add_table(1, cols =6)
#     table.style = 'Table Grid'

#     table.autofit = False
# #     table.columns[0].width = Inches(0.5)
# #     table.columns[1].width = Inches(4)
# #     table.columns[2].width = Inches(4)
# #     table.columns[3].width = Inches(0.5)


#     heading_cells = table.rows[0].cells

#     heading_cells[0].width = Inches(0.5)
#     heading_cells[1].width = Inches(0.5)
#     heading_cells[2].width = Inches(3.5)
#     heading_cells[3].width = Inches(0.8)
#     heading_cells[4].width = Inches(3.5)
#     heading_cells[5].width = Inches(2)

#     heading_cells[0].text = 'Line No'
#     heading_cells[1].text = 'Audited Line No'
#     heading_cells[2].text = 'Current Content'
#     heading_cells[3].text = 'Script Element'
#     heading_cells[4].text = 'New Content'
#     heading_cells[5].text = 'Changes Done'

#     print("assigned heading")
#     for i in range(0,6):
#         heading_cells[i].paragraphs[0].runs[0].font.bold = True
#         heading_cells[i].paragraphs[0].runs[0].font.size = Pt(9)

#     print("assigned Index")
#     for index in audit_df.index:

#         row_index = 1

#         #line_no = audit_df['line_no'][index]

#         cells = table.add_row().cells
#         cells[0].width = Inches(0.5)
#         cells[0].text = str(index)


#         audited_line_no = audit_df['audited_line_no'][index]
#         data = str(audited_line_no)
#         cells[1].width = Inches(0.5)
#         cells[1].text = data

#         cur_data = audit_df['data'][index]
#         data =  cur_data
#         cells[2].width = Inches(3.5)
#         data = str(data)
#         cells[2].text = data

#         if audit_df['Identification_Status'][index] == 'blank':
#             script_element = 'Blank Line'
#         elif audit_df['Identification_Status'][index] == '':
#             if audit_df['introduction'][index] == 'Yes':
#                 script_element = 'Title/Introduction'
#             elif audit_df['appendix'][index] == 'Yes':
#                 script_element = 'Appendix'
#             # -----------------------------changed with mohit sir
#             else:
#                 continue
#             # -----------------------------changed with mohit sir     
#         else:
#             script_element = ps_to_script_element(audit_df['Identification_Status'][index])
#         data =  script_element
#         cells[3].width = Inches(0.8)
#         cells[3].text = data

#         new_data =  audit_df['data_corrected'][index]
#         data =  new_data
#         cells[4].width = Inches(3.5)
#         data = str(data)
#         cells[4].text = data


# #         if audit_df['line_removed'][index] == 'Yes':
# #             data = "Line was removed"
# #             run = para.add_run()
# #             run.text = data
# #             run.add_break()
# #             continue


#         sno = 1
#         changes_done = False


#         if audit_df['left_indent_corrected'][index] != 'No':
#             change_comment = audit_df['left_indent_corrected'][index]
            
#             data = str(sno) + '. ' + str(change_comment)
#             cells[5].width = Inches(2)
#             para = cells[5].add_paragraph()
#             run = para.add_run()
#             run.text = data
#             run.add_break()
#             sno += 1
#             changes_done = True

#         if audit_df['right_indent_corrected'][index] != 'No':
#             change_comment = audit_df['right_indent_corrected'][index]
            
            
#             data = str(sno) + '. ' + str(change_comment)
#             cells[5].width = Inches(2)
#             para = cells[5].add_paragraph()
#             run = para.add_run()
#             run.text = data
#             run.add_break()
#             sno += 1
#             changes_done = True

#         if audit_df['case_corrected'][index] != 'No':
#             change_comment = 'Case ' + str(audit_df['case_corrected'][index])
            
#             data = str(sno) + '. ' + str(change_comment)
#             cells[5].width = Inches(2)
#             para = cells[5].add_paragraph()
#             run = para.add_run()
#             run.text = data
#             run.add_break()
#             sno += 1
#             changes_done = True

#         if audit_df['line_wrapped_at_prescribed_right_indent'][index] != 'No':
#             change_comment = 'Line Wrapped at Prescribed Right Indent'
            
#             data = str(sno) + '. ' + str(change_comment)
#             cells[5].width = Inches(2)
#             para = cells[5].add_paragraph()
#             run = para.add_run()
#             run.text = data
#             run.add_break()
#             sno += 1
#             changes_done = True

#         if audit_df['line_broken_into_multiple_lines'][index] != 'No':
#             change_comment = 'Line Broken into Multiple Lines'
            
#             data = str(sno) + '. ' + str(change_comment)
#             cells[5].width = Inches(2)
#             para = cells[5].add_paragraph()
#             run = para.add_run()
#             run.text = data
#             run.add_break()
#             sno += 1
#             changes_done = True

#         if audit_df['line_merged_with_next_line'][index] != 'No':
#             change_comment = 'Line Merged with Next Line'
#             data = str(sno) + '. ' + str(change_comment)
#             cells[5].width = Inches(2)
#             para = cells[5].add_paragraph()
#             run = para.add_run()
#             run.text = data
#             run.add_break()
#             sno += 1
#             changes_done = True

#         if audit_df['language_specific_audit_comments'][index] != 'No':
#             change_comment = str(audit_df['language_specific_audit_comments'][index])
            
#             data = str(sno) + '. ' + str(change_comment)
#             cells[5].width = Inches(2)
#             para = cells[5].add_paragraph()
#             run = para.add_run()
#             run.text = data
#             run.add_break()
#             sno += 1
#             changes_done = True

#         if not changes_done:
#             data = 'No Changes Done'
#             cells[5].width = Inches(2)
#             para = cells[5].add_paragraph()
#             run = para.add_run()
#             run.text = data
#             run.add_break()

#         row_index += 1

#     buffer = io.BytesIO()
#     output_doc.save(buffer)
#     buffer.seek(0)
#     print("complete")
    
#     #output_doc.save(audit_report_tabular_docx)
#     return buffer


# def print_audit_report_tabular_docx(audit_df):

#     #line_removed	header	left_indent_corrected	right_indent_corrected	line_wrapped_at_prescribed_right_indent	case_corrected	#blank_inserted_before	blank_inserted_after	blank_deleted_before	blank_deleted_after	space_removed_between_characters	#space_added_between_characters	line_merged_with_next_line	line_broken_into_multiple_lines	punctuation_mark_added	#punctuation_mark_removed

#     output_doc = Document()

#     style = output_doc.styles['Normal']
#     font = style.font
#     #font.name = 'Courier New'
#     font.size = Pt(8)

#     section = output_doc.sections[-1]
#     section.orientation = WD_ORIENT.LANDSCAPE

#     section.page_width = Inches(11)
#     section.left_margin = Inches(0.25)
#     section.right_margin = Inches(0.25)

#     para = output_doc.add_paragraph()
#     para.alignment = WD_ALIGN_PARAGRAPH.CENTER

#     run = para.add_run()
#     font = run.font
#     font.size = Pt(12)
#     run.text = ' Audit Report'
#     run.add_break()
#     run.add_break()

#     para = output_doc.add_paragraph()
#     para.alignment = WD_ALIGN_PARAGRAPH.LEFT
#     run = para.add_run()
#     font = run.font
#     font.size = Pt(10)
#     run.text = ' Audit Summary'

#     para = output_doc.add_paragraph()
#     run = para.add_run()
#     font = run.font
#     font.size = Pt(9)

#     case_corrected_count = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['case_corrected'] != 'No'),:])
#     left_indent_corrected_count = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['left_indent_corrected'] != 'No'),:])
#     right_indent_corrected_count = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['right_indent_corrected'] != 'No'),:])
#     wrapped_lines_count = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['line_wrapped_at_prescribed_right_indent'] != 'No'),:])


#     table =output_doc.add_table(1, cols =2)
#     table.style = 'Table Grid'

#     font.size = Pt(9)

#     heading_cells = table.rows[0].cells
#     heading_cells[0].width = Inches(2)
#     heading_cells[1].width = Inches(1)
#     heading_cells[0].text = 'Type of Change Done'
#     heading_cells[1].text = 'Count of Lines'
#     for i in range(0,2):
#         heading_cells[i].paragraphs[0].runs[0].font.bold = True

#     cells = table.add_row().cells
#     font.size = Pt(8)
#     cells[0].width = Inches(2)
#     cells[0].text = 'Case Corrected'
#     cells[1].width = Inches(0.5)
#     cells[1].text = str(case_corrected_count)


#     cells = table.add_row().cells
#     font.size = Pt(8)
#     cells[0].width = Inches(2)
#     cells[0].text = 'Left Indent Corrected'
#     cells[1].width = Inches(0.5)
#     cells[1].text = str(left_indent_corrected_count)


#     cells = table.add_row().cells
#     font.size = Pt(8)
#     cells[0].width = Inches(2)
#     cells[0].text = 'Case Corrected'
#     cells[1].width = Inches(0.5)
#     cells[1].text = str(right_indent_corrected_count)

#     cells = table.add_row().cells
#     font.size = Pt(8)
#     cells[0].width = Inches(2)
#     cells[0].text = 'Lines Wrapped at prescribed indents'
#     cells[1].width = Inches(0.5)
#     cells[1].text = str(wrapped_lines_count)


#     run.add_break()
#     para = output_doc.add_paragraph()
#     run = para.add_run()
#     run.add_break()
#     run.add_break()

#     font.size = Pt(8)

#     no_rows = len(audit_df.index)
#     table =output_doc.add_table(1, cols =6)
#     table.alignment = WD_TABLE_ALIGNMENT.CENTER
#     table.style = 'Table Grid'

#     table.autofit = False
#     table.columns[0].width = Inches(0.5)
#     table.columns[1].width = Inches(1.2)
#     table.columns[2].width = Inches(2)
#     table.columns[3].width = Inches(1.5)
#     table.columns[4].width = Inches(2)
#     table.columns[5].width = Inches(2.5)

#     heading_cells = table.rows[0].cells

#     heading_cells[0].width = Inches(0.5)
#     heading_cells[1].width = Inches(0.5)
#     heading_cells[2].width = Inches(3.5)
#     heading_cells[3].width = Inches(0.8)
#     heading_cells[4].width = Inches(3.5)
#     heading_cells[5].width = Inches(2)

#     heading_cells[0].text = 'Line No'
#     heading_cells[1].text = 'Audited Line No'
#     heading_cells[2].text = 'Current Content'
#     heading_cells[3].text = 'Script Element'
#     heading_cells[4].text = 'New Content'
#     heading_cells[5].text = 'Changes Done'


#     for i in range(0,6):
#         heading_cells[i].paragraphs[0].runs[0].font.bold = True
#         heading_cells[i].paragraphs[0].runs[0].font.size = Pt(9)


#     for index in audit_df.index:
#         columns_to_check = ["line_removed","introduction",	"appendix",	"page_no"	,"left_indent_corrected"	,"right_indent_corrected"	,"line_wrapped_at_prescribed_right_indent",	"case_corrected",	"blank_inserted_before"	,"blank_inserted_after"	,"blank_deleted_before"	,"blank_deleted_after"	,"space_removed_between_characters"	,"space_added_between_characters"	,"line_merged_with_next_line",	"line_broken_into_multiple_lines"	,"punctuation_mark_added"	,"punctuation_mark_removed"	,"language_specific_audit_comments"]
#         audit_df[columns_to_check] = audit_df[columns_to_check].fillna('No')
#         if audit_df.loc[index, columns_to_check].eq('No').all().all():
#             continue
            
#         elif audit_df['introduction'][index] == 'Yes':
#             continue
            
#         elif audit_df['appendix'][index] == 'Yes':
#             continue
            
#         elif audit_df['Identification_Status'][index] == 'blank':
#             continue
            
#         elif pd.isna(audit_df.loc[index, "Identification_Status"]):
#             continue
#         row_index = 1

#         #line_no = audit_df['line_no'][index]

#         cells = table.add_row().cells
#         cells[0].width = Inches(0.5)
#         cells[0].text = str(index)


#         audited_line_no = audit_df['audited_line_no'][index]
#         data = str(audited_line_no)
#         cells[1].width = Inches(0.5)
#         cells[1].text = data

#         cur_data = audit_df['data'][index]
#         data =  str(cur_data).strip()
#         cells[2].width = Inches(3.5)
#         data = str(data)
#         cells[2].text = data

#         if audit_df['Identification_Status'][index] == 'blank':
#             script_element = 'Blank Line'
#         elif audit_df['Identification_Status'][index] == '':
#             if audit_df['introduction'][index] == 'Yes':
#                 script_element = 'Title/Introduction'
#             elif audit_df['appendix'][index] == 'Yes':
#                 script_element = 'Appendix'
#             # -----------------------------changed with mohit sir
#             else:
#                 continue
#             # -----------------------------changed with mohit sir
#         else:
#             script_element = ps_to_script_element(audit_df['Identification_Status'][index])
#         data =  script_element
#         cells[3].width = Inches(0.8)
#         cells[3].text = data

#         new_data =  audit_df['data_corrected'][index]
#         data =  str(new_data).strip()
#         cells[4].width = Inches(3.5)
#         data = str(data)
#         cells[4].text = data

#         sno = 1
#         changes_done = False

#         # identification_status = audit_df['Identification_Status'][index]
#         if pd.isnull(audit_df['Identification_Status'][index]) or audit_df['Identification_Status'][index] == "":
#             continue


#         if audit_df['left_indent_corrected'][index] != 'No':
#             change_comment = audit_df['left_indent_corrected'][index]
#             try:
#               str_int = change_comment[-2]+change_comment[-1]
#             except Exception as e:
#               pass
#             if  ps_to_script_element(audit_df['Identification_Status'][index]) == "Dialogue":
#                 if str_int == "15":
#                   change_comment = "Dialogue line left index corrected to 1.5 Inch"
#                 elif str_int == "25":
#                   change_comment = "Dialogue line left index corrected to 2.5 Inch"


#             if str_int == "15":
#               name =   ps_to_script_element(audit_df['Identification_Status'][index])
#               change_comment = f"{name} line left indent corrected to 1.5 Inch"
#               print(change_comment)
#             elif str_int == "25":
#               name =   ps_to_script_element(audit_df['Identification_Status'][index])
#               change_commen = f"{name} left indent corrected to 2.5 Inch"
              
#             elif str_int == "30":
#                 change_comment = "Parenthetical left indent corrected to 3 Inch"
#             elif str_int == "35":
#                 change_comment = "Speaker left indent corrected to 3.5 Inch"

#             if len(str(change_comment)) <= 2 :
#                 continue
#             data = str(sno) + '. ' + str(change_comment)
#             cells[5].width = Inches(2)
#             para = cells[5].add_paragraph()
#             run = para.add_run()
#             run.text = data
#             run.add_break()
#             sno += 1
#             changes_done = True

#         if audit_df['right_indent_corrected'][index] != 'No':
#             name = ps_to_script_element(audit_df['Identification_Status'][index])
#             change_comment = audit_df['right_indent_corrected'][index]
#             try:
#               str_int = change_comment[-2]+change_comment[-1]
#             except Exception as e:
#               pass
#             if str_int == "10":
#               change_comment = f"{name} right indent corrected to 1 Inch"

#             if len(str(change_comment)) <= 2 :
#                 continue
#             data = str(sno) + '. ' + str(change_comment)
#             cells[5].width = Inches(2)
#             para = cells[5].add_paragraph()
#             run = para.add_run()
#             run.text = data
#             run.add_break()
#             sno += 1
#             changes_done = True

#         if audit_df['case_corrected'][index] != 'No':
#             name = ps_to_script_element(audit_df['Identification_Status'][index])
#             string = str(audit_df['case_corrected'][index])
#             string = string.split()
#             content = string[-1]
#             if content == "AllUpper":
#               change_comment = f'{name} Case ' + "Corrected to All Upper"
#             elif content == "AllLower":
#               change_comment = f'{name} Case ' + "Corrected to All Lowerr"
#             if len(str(change_comment)) <= 2 :
#                 continue
#             data = str(sno) + '. ' + str(change_comment)
#             cells[5].width = Inches(2)
#             para = cells[5].add_paragraph()
#             run = para.add_run()
#             run.text = data
#             run.add_break()
#             sno += 1
#             changes_done = True

#         if audit_df['line_wrapped_at_prescribed_right_indent'][index] != 'No':
#             change_comment = 'Line Wrapped at Prescribed Right Indent 1 Inch'
#             name = ps_to_script_element(audit_df['Identification_Status'][index])
#             if name == "Action":
#               change_comment = f'{name}Line Wrapped at Prescribed Right Indent 1 Inch'
#             elif name == "Dialogue":
#               change_comment = f'{name}Line Wrapped at Prescribed Right Indent 2 Inch'

#             if len(str(change_comment)) <= 2 :
#                 continue
#             data = str(sno) + '. ' + str(change_comment)
#             cells[5].width = Inches(2)
#             para = cells[5].add_paragraph()
#             run = para.add_run()
#             run.text = data
#             run.add_break()
#             sno += 1
#             changes_done = True

#         if audit_df['line_broken_into_multiple_lines'][index] != 'No':
#             name = ps_to_script_element(audit_df['Identification_Status'][index])
#             change_comment = f'{name} line Broken into Multiple Lines'

#             if len(str(change_comment)) <= 2 :
#                 continue
#             data = str(sno) + '. ' + str(change_comment)
#             cells[5].width = Inches(2)
#             para = cells[5].add_paragraph()
#             run = para.add_run()
#             run.text = data
#             run.add_break()
#             sno += 1
#             changes_done = True

#         if audit_df['line_merged_with_next_line'][index] != 'No':
#             name = ps_to_script_element(audit_df['Identification_Status'][index])
#             change_comment = f'{name} line Merged with Next Line'

#             if len(str(change_comment)) <= 2 :
#                 continue
#             data = str(sno) + '. ' + str(change_comment)
#             cells[5].width = Inches(2)
#             para = cells[5].add_paragraph()
#             run = para.add_run()
#             run.text = data
#             run.add_break()
#             sno += 1
#             changes_done = True

#         if audit_df['language_specific_audit_comments'][index] != 'No':
#             pass
#             name = ps_to_script_element(audit_df['Identification_Status'][index])
#             change_comment = f"{name}",str(audit_df['language_specific_audit_comments'][index])

#             if len(str(change_comment)) <= 2 :
#                 continue
#             data = str(sno) + '. ' + str(change_comment)
#             cells[5].width = Inches(2)
#             para = cells[5].add_paragraph()
#             run = para.add_run()
#             run.text = data
#             run.add_break()
#             sno += 1
#             changes_done = True
        
#         if audit_df['blank_inserted_after'][index] != 'No':
#             change_comment = 'A blank line is added below'
#             data = str(sno) + '. ' + str(change_comment)
#             cells[5].width = Inches(2)
#             para = cells[5].add_paragraph()
#             run = para.add_run()
#             run.text = data
#             run.add_break()
#             sno += 1
#             changes_done = True

#         if not changes_done:
#             continue
#             # data = 'No Changes Done'
#             # cells[5].width = Inches(2)
#             # para = cells[5].add_paragraph()
#             # run = para.add_run()
#             # run.text = data
#             # run.add_break()

#         row_index += 1

#     buffer = io.BytesIO()
#     output_doc.save(buffer)
#     buffer.seek(0)


#     # output_doc.save(audit_report_tabular_docx)
#     return buffer

# def print_audit_report_tabular_docx(audit_df,scriptname,author,pre_audit_pagenumber,postauditpagenumber,preaudit_line_no,postaudit_line_no,script_language,dialogue_language):

#     #line_removed	header	left_indent_corrected	right_indent_corrected	line_wrapped_at_prescribed_right_indent	case_corrected	#blank_inserted_before	blank_inserted_after	blank_deleted_before	blank_deleted_after	space_removed_between_characters	#space_added_between_characters	line_merged_with_next_line	line_broken_into_multiple_lines	punctuation_mark_added	#punctuation_mark_removed

#     total_no_blanklines = len(audit_df[audit_df['Identification_Status'].isin(['blank'])])
# # <---------------------BLANK LINE ADD AND remove LOGIC IS HERE----------------->
#     blankline_added = len(audit_df.loc[(audit_df['line_removed'] == 'No')  & (audit_df['blank_inserted_before'] != 'No'),:] )
#     blank_add_after = len(audit_df.loc[(audit_df['line_removed'] == 'No')  & (audit_df['blank_inserted_after'] != 'No'),:] )
#     blankline_inserted = blankline_added + blank_add_after

#     blankline_rem_before = len(audit_df.loc[(audit_df['line_removed'] == 'No')  & (audit_df['blank_deleted_before'] != 'No'),:] )
#     blank_rem_after = len(audit_df.loc[(audit_df['line_removed'] == 'No')  & (audit_df['blank_deleted_after'] != 'No'),:] )
#     blankline_removed_total = blankline_rem_before + blank_rem_after

# ### <<----------------- logic for case --------------------------------->
# # for slugline
#   # case corrected
#     sluglinecase_corrected_count = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['case_corrected'] != 'No') & (audit_df['Identification_Status'].isin(['ps1', 'ps2', 'ps3'])), :])
#     print(sluglinecase_corrected_count)
#   # indentatioin corrected
#     sleft_indent_corrected_count = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['left_indent_corrected'] != 'No')& (audit_df['Identification_Status'].isin(['ps1', 'ps2', 'ps3'])),:])
#     sright_indent_corrected_count = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['right_indent_corrected'] != 'No')& (audit_df['Identification_Status'].isin(['ps1', 'ps2', 'ps3'])),:])
#     swrapped_lines_count = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['line_wrapped_at_prescribed_right_indent'] != 'No')& (audit_df['Identification_Status'].isin(['ps1', 'ps2', 'ps3'])),:])
#     slugline_indentation = sleft_indent_corrected_count + sright_indent_corrected_count + swrapped_lines_count
#     print("sluglin_indentation:",slugline_indentation)
#   # formate corrected
#     slugline_formate1 = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['space_removed_between_characters'] != 'No')& (audit_df['Identification_Status'].isin(['ps1', 'ps2', 'ps3'])),:])
#     slugline_formate2 = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['space_added_between_characters'] != 'No')& (audit_df['Identification_Status'].isin(['ps1', 'ps2', 'ps3'])),:])
#     slugline_formate3 = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['line_merged_with_next_line'] != 'No')& (audit_df['Identification_Status'].isin(['ps1', 'ps2', 'ps3'])),:])
#     slugline_formate4 = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['line_broken_into_multiple_lines'] != 'No')& (audit_df['Identification_Status'].isin(['ps1', 'ps2', 'ps3'])),:])
#     slugline_formate5 = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['punctuation_mark_added'] != 'No')& (audit_df['Identification_Status'].isin(['ps1', 'ps2', 'ps3'])),:])
#     slugline_formate6 = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['punctuation_mark_removed'] != 'No')& (audit_df['Identification_Status'].isin(['ps1', 'ps2', 'ps3'])),:])
#     slugline_formated = slugline_formate1 + slugline_formate2 + slugline_formate3 + slugline_formate4 + slugline_formate5 + slugline_formate6
#     print("slugline_formated",slugline_formated)
#   #total sluglines
#     total_no_sluglines = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['Identification_Status'].isin(['ps1', 'ps2', 'ps3'])), :])
#     print(total_no_sluglines)

# # for actioon -----line
#   # case corrected
#     actionlinecase_corrected_count = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['case_corrected'] != 'No') & (audit_df['Identification_Status'].isin(['ps4', 'ps5', 'ps6'])), :])
#     print(actionlinecase_corrected_count)
#   # indentatioin corrected
#     actionleft_indent_corrected_count = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['left_indent_corrected'] != 'No')& (audit_df['Identification_Status'].isin(['ps4', 'ps5', 'ps6'])),:])
#     actionright_indent_corrected_count = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['right_indent_corrected'] != 'No')& (audit_df['Identification_Status'].isin(['ps4', 'ps5', 'ps6'])),:])
#     actionwrapped_lines_count = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['line_wrapped_at_prescribed_right_indent'] != 'No')& (audit_df['Identification_Status'].isin(['ps4', 'ps5', 'ps6'])),:])
#     actionline_indentation = actionleft_indent_corrected_count + actionright_indent_corrected_count + actionwrapped_lines_count
#     print("actionliine_indentation:",actionline_indentation)
#   # formate corrected
#     actionline_formate1 = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['space_removed_between_characters'] != 'No')& (audit_df['Identification_Status'].isin(['ps4', 'ps5', 'ps6'])),:])
#     actionline_formate2 = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['space_added_between_characters'] != 'No')& (audit_df['Identification_Status'].isin(['ps4', 'ps5', 'ps6'])),:])
#     actionline_formate3 = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['line_merged_with_next_line'] != 'No')& (audit_df['Identification_Status'].isin(['ps4', 'ps5', 'ps6'])),:])
#     actionline_formate4 = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['line_broken_into_multiple_lines'] != 'No')& (audit_df['Identification_Status'].isin(['ps4', 'ps5', 'ps6'])),:])
#     actionline_formate5 = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['punctuation_mark_added'] != 'No')& (audit_df['Identification_Status'].isin(['ps4', 'ps5', 'ps6'])),:])
#     actionline_formate6 = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['punctuation_mark_removed'] != 'No')& (audit_df['Identification_Status'].isin(['ps4', 'ps5', 'ps6'])),:])
#     actionline_formated = actionline_formate1 + actionline_formate2 + actionline_formate3 + actionline_formate4 + actionline_formate5 + actionline_formate6
#     print("actionline_formated",actionline_formated)
#   #total no of actionline
#     total_actionlines = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['Identification_Status'].isin(['ps4', 'ps5', 'ps6'])), :])

#     print(total_actionlines)


# # for Speaker
#   # case corrected
#     speakercase_corrected_count = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['case_corrected'] != 'No') & (audit_df['Identification_Status'].isin(['ps7', 'ps8', 'ps9'])), :])
#     print("speakercase_corrected_count", speakercase_corrected_count)
#   # indentatioin corrected
#     speakerleft_indent_corrected_count = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['left_indent_corrected'] != 'No')& (audit_df['Identification_Status'].isin(['ps7', 'ps8', 'ps9'])),:])
#     speakerright_indent_corrected_count = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['right_indent_corrected'] != 'No')& (audit_df['Identification_Status'].isin(['ps7', 'ps8', 'ps9'])),:])
#     speaker_lines_count = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['line_wrapped_at_prescribed_right_indent'] != 'No')& (audit_df['Identification_Status'].isin(['ps7', 'ps8', 'ps9'])),:])
#     speaker_indentation = speakerleft_indent_corrected_count + speakerright_indent_corrected_count + speaker_lines_count
#     print("speaker_indentation:",speaker_indentation)
#   # formate corrected
#     speaker_formate1 = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['space_removed_between_characters'] != 'No')& (audit_df['Identification_Status'].isin(['ps7', 'ps8', 'ps9'])),:])
#     speaker_formate2 = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['space_added_between_characters'] != 'No')& (audit_df['Identification_Status'].isin(['ps7', 'ps8', 'ps9'])),:])
#     speaker_formate3 = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['line_merged_with_next_line'] != 'No')& (audit_df['Identification_Status'].isin(['ps7', 'ps8', 'ps9'])),:])
#     speaker_formate4 = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['line_broken_into_multiple_lines'] != 'No')& (audit_df['Identification_Status'].isin(['ps7', 'ps8', 'ps9'])),:])
#     speaker_formate5 = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['punctuation_mark_added'] != 'No')& (audit_df['Identification_Status'].isin(['ps7', 'ps8', 'ps9'])),:])
#     speaker_formate6 = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['punctuation_mark_removed'] != 'No')& (audit_df['Identification_Status'].isin(['ps7', 'ps8', 'ps9'])),:])
#     speaker_formated = speaker_formate1 + speaker_formate2 + speaker_formate3 + speaker_formate4 + speaker_formate5 + speaker_formate6
#     print("speaker_formated",speaker_formated)
#   #total no of speaker -speaker
#     total_no_speaker = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['Identification_Status'].isin(['ps7', 'ps8', 'ps9'])),:])

#     print(total_no_speaker)


# # for Parenthetical -----line
#   # case corrected
#     parentheticalcase_corrected_count = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['case_corrected'] != 'No') & (audit_df['Identification_Status'].isin(['ps10', 'ps11', 'ps12', 'ps20'])), :])
#     print(parentheticalcase_corrected_count)
#   # indentatioin corrected
#     parenthetical_left_indent_corrected_count = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['left_indent_corrected'] != 'No')& (audit_df['Identification_Status'].isin(['ps10', 'ps11', 'ps12', 'ps20'])),:])
#     parenthetical_right_indent_corrected_count = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['right_indent_corrected'] != 'No')& (audit_df['Identification_Status'].isin(['ps10', 'ps11', 'ps12', 'ps20'])),:])
#     parenthetical_wrapped_lines_count = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['line_wrapped_at_prescribed_right_indent'] != 'No')& (audit_df['Identification_Status'].isin(['ps10', 'ps11', 'ps12', 'ps20'])),:])
#     parenthetical_line_indentation = parenthetical_left_indent_corrected_count + parenthetical_right_indent_corrected_count + parenthetical_wrapped_lines_count
#     print("parenthetical_line_indentation:",parenthetical_line_indentation)
#   # formate corrected
#     parenthetical_formate1 = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['space_removed_between_characters'] != 'No')& (audit_df['Identification_Status'].isin(['ps10', 'ps11', 'ps12', 'ps20'])),:])
#     parenthetical_formate2 = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['space_added_between_characters'] != 'No') & (audit_df['Identification_Status'].isin(['ps10', 'ps11', 'ps12', 'ps20'])),:])
#     parenthetical_formate3 = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['line_merged_with_next_line'] != 'No')& (audit_df['Identification_Status'].isin(['ps10', 'ps11', 'ps12', 'ps20'])),:])
#     parenthetical_formate4 = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['line_broken_into_multiple_lines'] != 'No')& (audit_df['Identification_Status'].isin(['ps10', 'ps11', 'ps12', 'ps20'])),:])
#     parenthetical_formate5 = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['punctuation_mark_added'] != 'No')& (audit_df['Identification_Status'].isin(['ps10', 'ps11', 'ps12', 'ps20'])),:])
#     parenthetical_formate6 = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['punctuation_mark_removed'] != 'No')& (audit_df['Identification_Status'].isin(['ps10', 'ps11', 'ps12', 'ps20'])),:])
#     parenthetical_formated = parenthetical_formate1 + parenthetical_formate2 + parenthetical_formate3 + parenthetical_formate4 + parenthetical_formate5 + parenthetical_formate6
#     print("parenthetical_formated",parenthetical_formated)
#   #total number of parenthetical
#     total_no_parenthetical = len(audit_df.loc[(audit_df['line_removed'] == 'No')& (audit_df['Identification_Status'].isin(['ps10', 'ps11', 'ps12', 'ps20'])),:])

#     print(total_no_parenthetical)


# # for Dialogue -----line
#   # case corrected
#     Dialogue_case_corrected_count = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['case_corrected'] != 'No') & (audit_df['Identification_Status'].isin(['ps13', 'ps14', 'ps15'])), :])
#     print(Dialogue_case_corrected_count)
#   # indentatioin corrected
#     dialogue_left_indent_corrected_count = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['left_indent_corrected'] != 'No')& (audit_df['Identification_Status'].isin(['ps13', 'ps14', 'ps15'])),:])
#     dialogue_right_indent_corrected_count = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['right_indent_corrected'] != 'No')& (audit_df['Identification_Status'].isin(['ps13', 'ps14', 'ps15'])),:])
#     dialogue_wrapped_lines_count = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['line_wrapped_at_prescribed_right_indent'] != 'No')& (audit_df['Identification_Status'].isin(['ps13', 'ps14', 'ps15'])),:])
#     dialogue_line_indentation = dialogue_left_indent_corrected_count + dialogue_right_indent_corrected_count + dialogue_wrapped_lines_count
#     print("dialogue_line_indentation:",dialogue_line_indentation)
#   # formate corrected
#     dialogue_formate1 = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['space_removed_between_characters'] != 'No')& (audit_df['Identification_Status'].isin(['ps13', 'ps14', 'ps15'])),:])
#     dialogue_formate2 = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['space_added_between_characters'] != 'No')& (audit_df['Identification_Status'].isin(['ps13', 'ps14', 'ps15'])),:])
#     dialogue_formate3 = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['line_merged_with_next_line'] != 'No')& (audit_df['Identification_Status'].isin(['ps13', 'ps14', 'ps15'])),:])
#     dialogue_formate4 = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['line_broken_into_multiple_lines'] != 'No')& (audit_df['Identification_Status'].isin(['ps13', 'ps14', 'ps15'])),:])
#     dialogue_formate5 = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['punctuation_mark_added'] != 'No')& (audit_df['Identification_Status'].isin(['ps13', 'ps14', 'ps15'])),:])
#     dialogue_formate6 = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['punctuation_mark_removed'] != 'No')& (audit_df['Identification_Status'].isin(['ps13', 'ps14', 'ps15'])),:])
#     dialogue_formated = dialogue_formate1 + dialogue_formate2 + dialogue_formate3 + dialogue_formate4 + dialogue_formate5 + dialogue_formate6
#     print("dialogue_formated",dialogue_formated)
#   # total number of dialogue
#     total_no_dialogue = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['Identification_Status'].isin(['ps13', 'ps14', 'ps15'])),:])

#     print(total_no_dialogue)

# # for Transistion -----line
#   # case corrected
#     transitions_case_corrected_count = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['case_corrected'] != 'No') & (audit_df['Identification_Status'].isin(['ps16'])), :])
#     print(transitions_case_corrected_count)
#   # indentatioin corrected
#     transitions_left_indent_corrected_count = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['left_indent_corrected'] != 'No')& (audit_df['Identification_Status'].isin(['ps16'])),:])
#     transitions_right_indent_corrected_count = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['right_indent_corrected'] != 'No')& (audit_df['Identification_Status'].isin(['ps16'])),:])
#     transitions_wrapped_lines_count = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['line_wrapped_at_prescribed_right_indent'] != 'No')& (audit_df['Identification_Status'].isin(['ps16'])),:])
#     transitions_line_indentation = transitions_left_indent_corrected_count + transitions_right_indent_corrected_count + transitions_wrapped_lines_count
#     print("transitions_line_indentation:",transitions_line_indentation)
#   # formate corrected
#     transitions_formate1 = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['space_removed_between_characters'] != 'No')& (audit_df['Identification_Status'].isin(['ps16'])),:])
#     transitions_formate2 = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['space_added_between_characters'] != 'No')& (audit_df['Identification_Status'].isin(['ps16'])),:])
#     transitions_formate3 = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['line_merged_with_next_line'] != 'No')& (audit_df['Identification_Status'].isin(['ps16'])),:])
#     transitions_formate4 = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['line_broken_into_multiple_lines'] != 'No')& (audit_df['Identification_Status'].isin(['ps16'])),:])
#     transitions_formate5 = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['punctuation_mark_added'] != 'No')& (audit_df['Identification_Status'].isin(['ps16'])),:])
#     transitions_formate6 = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['punctuation_mark_removed'] != 'No')& (audit_df['Identification_Status'].isin(['ps16'])),:])
#     transitions_formated = transitions_formate1 + transitions_formate2 + transitions_formate3 + transitions_formate4 + transitions_formate5 + transitions_formate6
#     print("transitions_formated",transitions_formated)
#   #total transition
#     total_no_transition = len(audit_df.loc[audit_df['Identification_Status'].isin(['ps16']),:])

#     print(total_no_transition)


# # for Spectial Terms -----line
#   # case corrected
#     st_case_corrected_count = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['case_corrected'] != 'No') & (audit_df['Identification_Status'].isin(['ps17'])), :])
#     print("st_case_corrected_count",st_case_corrected_count)
#   # indentatioin corrected
#     st_left_indent_corrected_count = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['left_indent_corrected'] != 'No')& (audit_df['Identification_Status'].isin(['ps17'])),:])
#     st_right_indent_corrected_count = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['right_indent_corrected'] != 'No')& (audit_df['Identification_Status'].isin(['ps17'])),:])
#     st_wrapped_lines_count = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['line_wrapped_at_prescribed_right_indent'] != 'No')& (audit_df['Identification_Status'].isin(['ps17'])),:])
#     st_line_indentation = st_left_indent_corrected_count + st_right_indent_corrected_count + st_wrapped_lines_count
#     print("st_line_indentation:",st_line_indentation)
#   # formate corrected
#     st_formate1 = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['space_removed_between_characters'] != 'No')& (audit_df['Identification_Status'].isin(['ps17'])),:])
#     st_formate2 = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['space_added_between_characters'] != 'No')& (audit_df['Identification_Status'].isin(['ps17'])),:])
#     st_formate3 = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['line_merged_with_next_line'] != 'No')& (audit_df['Identification_Status'].isin(['ps17'])),:])
#     st_formate4 = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['line_broken_into_multiple_lines'] != 'No')& (audit_df['Identification_Status'].isin(['ps17'])),:])
#     st_formate5 = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['punctuation_mark_added'] != 'No')& (audit_df['Identification_Status'].isin(['ps17'])),:])
#     st_formate6 = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['punctuation_mark_removed'] != 'No')& (audit_df['Identification_Status'].isin(['ps17'])),:])
#     st_formated = st_formate1 + st_formate2 + st_formate3 + st_formate4 + st_formate5 + st_formate6
#     print("st_formated",st_formated)
#   #total numner of special terms
#     total_special_terms = len(audit_df.loc[audit_df['Identification_Status'].isin(['ps17']),:])
#     if total_special_terms < 1 :
#       total_special_terms = 1
#     print(total_special_terms)


# # write logic for the percentage
#     #a
#     difference_of_page_no = int(pre_audit_pagenumber) - int(postauditpagenumber)
#     average_of_page_no = (int(pre_audit_pagenumber) + int(postauditpagenumber)) / 2
#     final_ratio_pageno = (difference_of_page_no / average_of_page_no) * 100

#     #b
#     difference_of_line_no = int(preaudit_line_no)- int(postaudit_line_no)
#     average_of_line_no = (int(preaudit_line_no) + int(postaudit_line_no)) / 2
#     final_ratio_lineno = (difference_of_line_no / average_of_line_no) * 100

#     #c
#     try:
#         ratio_for_blanklines = ((int(blankline_inserted) + int(blankline_removed_total)) / average_of_line_no) *100
#     except:
#       ratio_for_blanklines = 0

#     #j
#     try:
#         ratio_for_sluglines = ((int(sluglinecase_corrected_count)+int(slugline_indentation)+int(slugline_formated))/total_no_sluglines)*100
#     except:
#         ratio_for_sluglines = 0
#     #d
#     try:
#         ratio_for_actionlines = ((int(actionlinecase_corrected_count)+ int(actionline_indentation)+ int(total_actionlines))/total_actionlines)*100
#     except:
#         ratio_for_actionlines = 0

#     #e
#     try:
#         ratio_for_Speaker = ((int(speakercase_corrected_count)+int(speaker_formated)+int(speaker_formated))/ total_actionlines)*100
#     except:
#         ratio_for_Speaker = 0


#     #f
#     try:
#         ratio_for_parenthetical = ((int(parentheticalcase_corrected_count)+int(parenthetical_line_indentation)+int(parenthetical_formated)) / total_no_parenthetical)*100
#     except:
#         ratio_for_parenthetical = 0
#     #g
#     try:
#         ratio_for_dialogues = ((int(Dialogue_case_corrected_count)+int(dialogue_line_indentation)+int(dialogue_formated)) / total_no_dialogue)*100
#     except:
#         ratio_for_dialogues = 0
#     #h
#     try:
#         ratio_for_transitions = ((int(transitions_case_corrected_count)+int(transitions_line_indentation)+int(transitions_formated)) / total_no_transition)*100
#     except:
#         ratio_for_transitions = 0

#     #i
#     try:
#         ratio_for_special_terms = ((int(st_case_corrected_count)+int(st_line_indentation)+int(st_formated))/total_special_terms) * 100
#     except:
#         ratio_for_special_terms = 0

#     average_of_c_j = (ratio_for_sluglines+ratio_for_actionlines+ratio_for_Speaker+ratio_for_parenthetical+ratio_for_dialogues+ratio_for_transitions+ratio_for_special_terms)/7
#     audit_configuration_percentage = (final_ratio_pageno+final_ratio_lineno+ratio_for_blanklines) + (average_of_c_j)
#     audit_configuration_percentage_str = f"{audit_configuration_percentage:.2f}%"
#     print("audit_configuration_percentage",audit_configuration_percentage_str)

#     total_script_element_correct = (total_no_sluglines+total_actionlines+total_no_speaker+total_no_parenthetical+total_no_dialogue+total_no_transition+total_special_terms)
#     print("total_script_element_correct",total_script_element_correct)
#     audit_script_accuracy = (total_no_sluglines+total_actionlines+total_no_speaker+total_no_parenthetical+total_no_dialogue+total_no_transition+total_special_terms+total_no_blanklines)/preaudit_line_no
#     print("audit_script_accuracy",audit_script_accuracy)
#     # audit_script_accuracy_str = min(audit_script_accuracy*100 , 100)
#     audit_script_accuracy_str = min(audit_script_accuracy*100,100)
#     audit_script_accuracy_str = f"{audit_script_accuracy_str:.2f}%"
#     print("audit_script_accuracy_str",audit_script_accuracy_str)


# # the table logics ends here
# # percenteage table from here

#     output_doc = Document()
#     style = output_doc.styles['Normal']
#     font = style.font
#     #font.name = 'Courier New'
#     font.size = Pt(10)

#     section = output_doc.sections[-1]
#     section.orientation = WD_ORIENT.LANDSCAPE

#     section.page_width = Inches(11)
#     section.left_margin = Inches(0.25)
#     section.right_margin = Inches(0.25)

#     para = output_doc.add_paragraph()
#     para.alignment = WD_ALIGN_PARAGRAPH.CENTER

# # Audit Summary at center of the page with bold
#     run = para.add_run()
#     font = run.font
#     font.bold = True
#     font.size = Pt(14)
#     run.text = ' Audit Summary'
#     run.add_break()

#     # Add a paragraph for the left-aligned "Audit Date"
#     current_date = date.today()
#     # Convert to the "day month year" format
#     formatted_date = current_date.strftime("%d %B %Y")
#     left_aligned_text = output_doc.add_paragraph("\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\tAudit Date: " + str(formatted_date))
#     left_aligned_text.alignment = WD_ALIGN_PARAGRAPH.LEFT
#     font_audit_date = left_aligned_text.runs[0].font
#     font_audit_date.size = Pt(12)

#     para = output_doc.add_paragraph()
#     right_aligned_text = para.add_run('\t\tScriptname: ' + str(scriptname))
#     right_aligned_text.alignment = WD_ALIGN_PARAGRAPH.RIGHT
#     font_right = right_aligned_text.font
#     font_right.size = Pt(12)

#     author_para = output_doc.add_paragraph()
#     run_author = author_para.add_run("\t\tAuthor: " + str(author))
#     font_author = run_author.font
#     font_author.size = Pt(12)

#     language_script_para = output_doc.add_paragraph()
#     run_language_script = language_script_para.add_run("\t\tLanguage of Script: " + str(script_language))
#     font_language_script = run_language_script.font
#     font_language_script.size = Pt(12)

#     language_dialogue_para = output_doc.add_paragraph()
#     run_language_dialogue = language_dialogue_para.add_run("\t\tLanguage of Dialogue: " + str(dialogue_language))
#     font_language_dialogue = run_language_dialogue.font
#     font_language_dialogue.size = Pt(12)


# # for pre audit and post Audit
#     para = output_doc.add_paragraph()
#     run = para.add_run()
#     font = run.font
#     font.size = Pt(11)

#     percent_table = output_doc.add_table(1, cols =2)
#     percent_table.alignment = WD_TABLE_ALIGNMENT.CENTER
#     percent_table.style = 'Table Grid'

#     percent_heading_cells = percent_table.rows[0].cells
#     percent_heading_cells[0].width = Inches(1.5)
#     percent_heading_cells[1].width = Inches(1)
#     percent_heading_cells[0].text = 'Audit Contribution'
#     percent_heading_cells[1].text = str(audit_configuration_percentage_str)

#     percent_heading_cells = percent_table.add_row().cells
#     font.size = Pt(12)
#     percent_heading_cells[0].width = Inches(1.5)
#     percent_heading_cells[0].text = 'Audit Script Accuracy'
#     percent_heading_cells[1].width = Inches(1)
#     percent_heading_cells[1].text = str(audit_script_accuracy_str)

#     para = output_doc.add_paragraph()

#     pre_post_table = output_doc.add_table(1, cols =3)
#     pre_post_table.alignment = WD_TABLE_ALIGNMENT.CENTER
#     pre_post_table.style = 'Table Grid'

#     preheading_cells = pre_post_table.rows[0].cells
#     preheading_cells[0].width = Inches(1.5)
#     preheading_cells[1].width = Inches(1)
#     preheading_cells[2].width = Inches(1)
#     preheading_cells[1].text = 'Pre Audit'
#     preheading_cells[2].text = 'Post Audit'

# # row No of pages
#     pcells = pre_post_table.add_row().cells
#     font.size = Pt(12)
#     pcells[0].width = Inches(1.5)
#     pcells[0].text = 'No of Pages'
#     pcells[1].width = Inches(1)
#     pcells[1].text = str(pre_audit_pagenumber)
#     pcells[2].width = Inches(1)
#     pcells[2].text = str(postauditpagenumber)
# # row no of lines
#     pcells = pre_post_table.add_row().cells
#     font.size = Pt(12)
#     pcells[0].width = Inches(1.5)
#     pcells[0].text = 'No of lines'
#     pcells[1].width = Inches(1)
#     pcells[1].text = str(preaudit_line_no)
#     pcells[2].width = Inches(1)
#     pcells[2].text = str(postaudit_line_no)

# # adding extra line after the table above
#     para = output_doc.add_paragraph()
#     run = para.add_run()
#     font = run.font
#     font.size = Pt(12)
# #--------------------------------------


#     bl_table = output_doc.add_table(1, cols =2)
#     bl_table.alignment = WD_TABLE_ALIGNMENT.CENTER
#     bl_table.style = 'Table Grid'

#     bl_heading_cells = bl_table.rows[0].cells
#     bl_heading_cells[0].width = Inches(1.5)
#     bl_heading_cells[0].text = 'Blank Lines Added'
#     bl_heading_cells[1].width = Inches(1.5)
#     bl_heading_cells[1].text = str(blankline_inserted) # add the number here

#     blcells = bl_table.add_row().cells
#     font.size = Pt(12)
#     blcells[0].width = Inches(1.5)
#     blcells[0].text = 'Blank Lines Removed'
#     blcells[1].width = Inches(1.5)
#     blcells[1].text = str(blankline_removed_total) # add the number here

# # adding extra line after the table above

#     para = output_doc.add_paragraph()
#     run = para.add_run()
#     font = run.font
#     font.size = Pt(12)

#     sum_table = output_doc.add_table(1, cols =4)
#     sum_table.alignment = WD_TABLE_ALIGNMENT.CENTER
#     sum_table.style = 'Table Grid'

#     sum_heading_cells = sum_table.rows[0].cells
#     sum_heading_cells[0].width = Inches(1.5)
#     sum_heading_cells[0].text = ''
#     sum_heading_cells[1].width = Inches(1.5)
#     sum_heading_cells[1].height = Inches(0.5)
#     sum_heading_cells[1].text = 'Case Correction'
#     sum_heading_cells[2].width = Inches(1.5)
#     sum_heading_cells[2].text = 'Indent Correction'
#     sum_heading_cells[3].width = Inches(1.5)
#     sum_heading_cells[3].text = 'Format Correction'


#     sum_cells = sum_table.add_row().cells
#     font.size = Pt(12)
#     sum_cells[0].width = Inches(1.5)
#     sum_cells[0].height = Inches(0.3)
#     sum_cells[0].text = 'Sluglines'
#     sum_cells[1].width = Inches(1.5)
#     sum_cells[1].height = Inches(0.3)
#     sum_cells[1].text = str(sluglinecase_corrected_count)
#     sum_cells[2].width = Inches(1.5)
#     sum_cells[2].height = Inches(0.3)
#     sum_cells[2].text = str(slugline_indentation)
#     sum_cells[3].width = Inches(1.5)
#     sum_cells[3].height = Inches(0.3)
#     sum_cells[3].text = str(slugline_formated)

#     sum_cells = sum_table.add_row().cells
#     font.size = Pt(12)
#     sum_cells[0].width = Inches(1.5)
#     sum_cells[0].text = 'Actioin Lines'
#     sum_cells[1].width = Inches(1.5)
#     sum_cells[1].text = str(actionlinecase_corrected_count)
#     sum_cells[2].width = Inches(1.5)
#     sum_cells[2].text = str(actionline_indentation)
#     sum_cells[3].width = Inches(1.5)
#     sum_cells[3].text = str(actionline_formated)

#     sum_cells = sum_table.add_row().cells
#     font.size = Pt(12)
#     sum_cells[0].width = Inches(1.5)
#     sum_cells[0].text = 'Speakers'
#     sum_cells[1].width = Inches(1.5)
#     sum_cells[1].text = str(speakercase_corrected_count)
#     sum_cells[2].width = Inches(1.5)
#     sum_cells[2].text = str(speaker_indentation)
#     sum_cells[3].width = Inches(1.5)
#     sum_cells[3].text = str(speaker_formated)

#     sum_cells = sum_table.add_row().cells
#     font.size = Pt(12)
#     sum_cells[0].width = Inches(1.5)
#     sum_cells[0].text = 'Parentheticals'
#     sum_cells[1].width = Inches(1.5)
#     sum_cells[1].text = str(parentheticalcase_corrected_count)
#     sum_cells[2].width = Inches(1.5)
#     sum_cells[2].text = str(parenthetical_line_indentation)
#     sum_cells[3].width = Inches(1.5)
#     sum_cells[3].text = str(parenthetical_formated)

#     sum_cells = sum_table.add_row().cells
#     font.size = Pt(12)
#     sum_cells[0].width = Inches(1.5)
#     sum_cells[0].text = 'Dialogues'
#     sum_cells[1].width = Inches(1.5)
#     sum_cells[1].text = str(Dialogue_case_corrected_count)
#     sum_cells[2].width = Inches(1.5)
#     sum_cells[2].text = str(dialogue_line_indentation)
#     sum_cells[3].width = Inches(1.5)
#     sum_cells[3].text = str(dialogue_formated)

#     sum_cells = sum_table.add_row().cells
#     font.size = Pt(12)
#     sum_cells[0].width = Inches(1.5)
#     sum_cells[0].text = 'Transitions'
#     sum_cells[1].width = Inches(1.5)
#     sum_cells[1].text = str(transitions_case_corrected_count)
#     sum_cells[2].width = Inches(1.5)
#     sum_cells[2].text = str(transitions_line_indentation)
#     sum_cells[3].width = Inches(1.5)
#     sum_cells[3].text = str(transitions_formated)

#     sum_cells = sum_table.add_row().cells
#     font.size = Pt(12)
#     sum_cells[0].width = Inches(1.5)
#     sum_cells[0].text = 'Special Terms'
#     sum_cells[1].width = Inches(1.5)
#     sum_cells[1].text = str(st_case_corrected_count)
#     sum_cells[2].width = Inches(1.5)
#     sum_cells[2].text = str(st_line_indentation)
#     sum_cells[3].width = Inches(1.5)
#     sum_cells[3].text = str(st_line_indentation)


#     para = output_doc.add_paragraph()
#     run = para.add_run()
#     font = run.font
#     font.size = Pt(12)

#     para = output_doc.add_paragraph()
#     run = para.add_run()
#     run.add_break()
#     run.add_break()
# #--------------------------- 14-09-2023
#     for _ in range(5):
#         output_doc.add_paragraph()
#    #----------------------- 14-09-23
#     para = output_doc.add_paragraph()
#     para.alignment = WD_ALIGN_PARAGRAPH.CENTER

# # Audit detail at center of the page with bold
#     run = para.add_run()
#     font = run.font
#     font.bold = True
#     font.size = Pt(14)
#     run.text = ' Audit Details'
#     run.add_break()
#   # --------------------------  14-09-23

#     no_rows = len(audit_df.index)

#     table =output_doc.add_table(1, cols =6)
#     table.alignment = WD_TABLE_ALIGNMENT.CENTER

#     table.style = 'Table Grid'

#     table.autofit = False
#     table.columns[0].width = Inches(0.5)
#     table.columns[1].width = Inches(1.2)
#     table.columns[2].width = Inches(2)
#     table.columns[3].width = Inches(1.5)
#     table.columns[4].width = Inches(2)
#     table.columns[5].width = Inches(2.5)
# #     table.columns[3].width = Inches(0.5)


#     heading_cells = table.rows[0].cells

#     heading_cells[0].width = Inches(0.1)
#     heading_cells[1].width = Inches(0.1)
#     heading_cells[2].width = Inches(3.5)
#     heading_cells[3].width = Inches(0.8)
#     heading_cells[4].width = Inches(3.5)
#     heading_cells[5].width = Inches(2)

#     heading_cells[0].text = 'Line No'
#     heading_cells[1].text = 'Audited Line No'
#     heading_cells[2].text = 'Current Content'
#     heading_cells[3].text = 'Script Element'
#     heading_cells[4].text = 'New Content'
#     heading_cells[5].text = 'Changes Done'


#     for i in range(0,6):
#         heading_cells[i].paragraphs[0].runs[0].font.bold = True
#         heading_cells[i].paragraphs[0].runs[0].font.size = Pt(9)


# #------------------------------->LOGIC HERE<---------------------------------------------
#     report_df = pd.DataFrame(columns=['line_no', 'audited_line_no', 'current_content', 'script_element', 'new_content', 'changes_done', 'para_no'])

#     for index in audit_df.index:

#         columns_to_check = ["line_removed","introduction",	"appendix",	"page_no"	,"left_indent_corrected"	,"right_indent_corrected"	,"line_wrapped_at_prescribed_right_indent",	"case_corrected",	"blank_inserted_before"	,"blank_inserted_after"	,"blank_deleted_before"	,"blank_deleted_after"	,"space_removed_between_characters"	,"space_added_between_characters"	,"line_merged_with_next_line",	"line_broken_into_multiple_lines"	,"punctuation_mark_added"	,"punctuation_mark_removed"	,"language_specific_audit_comments"]
#         audit_df[columns_to_check] = audit_df[columns_to_check].fillna('No')
#         if audit_df.loc[index, columns_to_check].eq('No').all().all():
#             # All columns contain 'No', skip this row
#             continue
#         elif audit_df['introduction'][index] == 'Yes':
#             continue
#         elif audit_df['appendix'][index] == 'Yes':
#             continue
#         elif audit_df['Identification_Status'][index] == 'blank':
#             continue
#         elif pd.isna(audit_df.loc[index, "Identification_Status"]):
#             continue


#         para_value = audit_df["para_no"][index]    # ---------------------------------------------><-------------------------
#         current_para_value = report_df['para_no'].iloc[-1] if not report_df.empty else None
#         if para_value == current_para_value:
#             continue
#         else:
#             # report_df = report_df.append(audit_df.loc[index], ignore_index=True)
#             new_row = audit_df.loc[index].to_frame().T
#             report_df = pd.concat([report_df, new_row], ignore_index=True)
#         print("current_para_value",current_para_value)


#         row_index = 1
#         old_line_no_index = index
#         collection_old_line_no = []
#         while old_line_no_index < len(audit_df) and str(audit_df["para_no"][old_line_no_index]) == str(para_value):
#             if audit_df['Identification_Status'][old_line_no_index] != "blank":
#                 try:
#                     data = int(old_line_no_index)
#                     collection_old_line_no.append(str(data))
#                 except ValueError:
#                     pass
#             old_line_no_index += 1

#         cells = table.add_row().cells
#         cells[0].width = Inches(0.1)
#         cells[0].text = ', '.join(collection_old_line_no)

#         audited_line_index = index
#         #--------------------------------------audited_lino_no------------------
#         collection_audited_line_no = []
#         while audited_line_index < len(audit_df) and str(audit_df["para_no"][audited_line_index]) == str(para_value):
#             if audit_df['Identification_Status'][audited_line_index] != "blank":
#                 audited_line_no = audit_df['audited_line_no'][audited_line_index]
#                 try:
#                     data = int(audited_line_no)
#                     collection_audited_line_no.append(str(data))
#                 except ValueError:
#                     pass
#             audited_line_index += 1
#         print("collection_audited_line_no", collection_audited_line_no)
#         data_string = ', '.join(collection_audited_line_no)
#         print("data_string:", data_string)
#         cells[1].width = Inches(0.1)
#         cells[1].text = data_string


#         #------------------------------>OLD DATA<---------------------------------
#         data_index = index
#         collection_data = []
#         while data_index < len(audit_df) and str(audit_df["para_no"][data_index]) == str(para_value):
#             cur_data = audit_df['data'][data_index]
#             if not pd.isna(cur_data):  # Check if the value is not NaN
#                 data = str(cur_data).strip()
#                 collection_data.append(data)
#             data_index += 1

#         cells[2].width = Inches(3.5)
#         data = str(data)
#         cells[2].text = '\n '.join(collection_data)

#         if audit_df['Identification_Status'][index] == 'blank':
#             script_element = 'Blank Line'
#         elif audit_df['Identification_Status'][index] == '':
#             if audit_df['introduction'][index] == 'Yes':
#                 script_element = 'Title/Introduction'
#             elif audit_df['appendix'][index] == 'Yes':
#                 script_element = 'Appendix'
#             else:
#                 continue

#         else:
#             script_element = ps_to_script_element(audit_df['Identification_Status'][index])
#         data =  script_element
#         cells[3].width = Inches(0.8)
#         cells[3].text = data

#         collection_new_data = []
#         new_data_index = index

#         while new_data_index < len(audit_df) and str(audit_df["para_no"][new_data_index]) == str(para_value):
#             if audit_df["line_removed"][new_data_index] == "No":
#                 new_data =  audit_df['data_corrected'][new_data_index]
#                 if not pd.isna(new_data):  # Check if the value is not NaN
#                     data = str(new_data).strip()
#                     collection_new_data.append(data)
#             new_data_index += 1
#         data =  str(new_data).strip()
#         cells[4].width = Inches(3.5)
#         data = str(data)
#         cells[4].text = '\n '.join(collection_new_data)

#         sno = 1
#         changes_done = False

#         # identification_status = audit_df['Identification_Status'][index]
#         if pd.isnull(audit_df['Identification_Status'][index]) or audit_df['Identification_Status'][index] == "":
#             continue


#         if audit_df['left_indent_corrected'][index] != 'No':
#             change_comment = audit_df['left_indent_corrected'][index]
#             try:
#               str_int = change_comment[-2]+change_comment[-1]
#             except Exception as e:
#               pass
#             if  ps_to_script_element(audit_df['Identification_Status'][index]) == "Dialogue":
#                 if str_int == "15":
#                   change_comment = "Dialogue line left index corrected to 1.5 Inch"
#                 elif str_int == "25":
#                   change_comment = "Dialogue line left index corrected to 2.5 Inch"


#             if str_int == "15":
#               name =   ps_to_script_element(audit_df['Identification_Status'][index])
#               change_comment = f"{name} line left indent corrected to 1.5 Inch"
#               print(change_comment)
#             elif str_int == "25":
#               name =   ps_to_script_element(audit_df['Identification_Status'][index])
#               change_commen = f"{name} left indent corrected to 2.5 Inch"

#             elif str_int == "30":
#                 change_comment = "Parenthetical left indent corrected to 3 Inch"
#             elif str_int == "35":
#                 change_comment = "Speaker left indent corrected to 3.5 Inch"

#             data = str(sno) + '. ' + str(change_comment)
#             # dataa = data.split()
#             # if dataa[-1] == "nan":
#             #   continue

#             cells[5].width = Inches(2)
#             para = cells[5].add_paragraph()
#             run = para.add_run()
#             run.text = data
#             run.add_break()
#             sno += 1
#             changes_done = True

#         if audit_df['right_indent_corrected'][index] != 'No':
#             name = ps_to_script_element(audit_df['Identification_Status'][index])
#             change_comment = audit_df['right_indent_corrected'][index]
#             try:
#               str_int = change_comment[-2]+change_comment[-1]
#             except Exception as e:
#               pass
#             if str_int == "10":
#               change_comment = f"{name} right indent corrected to 1 Inch"


#             data = str(sno) + '. ' + str(change_comment)
#             # dataa = data.split()
#             # if dataa[-1] == "nan":
#             #   continue
#             cells[5].width = Inches(2)
#             para = cells[5].add_paragraph()
#             run = para.add_run()
#             run.text = data
#             run.add_break()
#             sno += 1
#             changes_done = True

#         if audit_df['case_corrected'][index] != 'No':
#             name = ps_to_script_element(audit_df['Identification_Status'][index])
#             string = str(audit_df['case_corrected'][index])
#             string = string.split()
#             content = string[-1]
#             if content == "AllUpper":
#               change_comment = f'{name} Case ' + "Corrected to All Upper"
#             elif content == "AllLower":
#               change_comment = f'{name} Case ' + "Corrected to All Lowerr"
#             if len(str(change_comment)) <= 2 :
#                 continue
#             data = str(sno) + '. ' + str(change_comment)
#             # dataa = data.split()
#             # if dataa[-1] == "nan":
#             #   continue
#             cells[5].width = Inches(2)
#             para = cells[5].add_paragraph()
#             run = para.add_run()
#             run.text = data
#             run.add_break()
#             sno += 1
#             changes_done = True

#         if audit_df['line_wrapped_at_prescribed_right_indent'][index] != 'No':
#             change_comment = 'Line Wrapped at Prescribed Right Indent 1 Inch'
#             name = ps_to_script_element(audit_df['Identification_Status'][index])
#             if name == "Action":
#               change_comment = f'{name}Line Wrapped at Prescribed Right Indent 1 Inch'
#             elif name == "Dialogue":
#               change_comment = f'{name}Line Wrapped at Prescribed Right Indent 2 Inch'

#             data = str(sno) + '. ' + str(change_comment)
#             # dataa = data.split()
#             # if dataa[-1] == "nan":
#             #   continue

#             cells[5].width = Inches(2)
#             para = cells[5].add_paragraph()
#             run = para.add_run()
#             run.text = data
#             run.add_break()
#             sno += 1
#             changes_done = True

#         if audit_df['line_broken_into_multiple_lines'][index] != 'No':
#             name = ps_to_script_element(audit_df['Identification_Status'][index])
#             change_comment = f'{name} line Broken into Multiple Lines'

#             data = str(sno) + '. ' + str(change_comment)
#             # dataa = data.split()
#             # if dataa[-1] == "nan":
#             #   continue
#             cells[5].width = Inches(2)
#             para = cells[5].add_paragraph()
#             run = para.add_run()
#             run.text = data
#             run.add_break()
#             sno += 1
#             changes_done = True

#         if audit_df['line_merged_with_next_line'][index] != 'No':
#             name = ps_to_script_element(audit_df['Identification_Status'][index])
#             change_comment = f'{name} line Merged with Next Line'

#             data = str(sno) + '. ' + str(change_comment)
#             # dataa = data.split()
#             # if dataa[-1] == "nan":
#             #   continue
#             cells[5].width = Inches(2)
#             para = cells[5].add_paragraph()
#             run = para.add_run()
#             run.text = data
#             run.add_break()
#             sno += 1
#             changes_done = True

#         if audit_df['language_specific_audit_comments'][index] != 'No':
#             pass
#             name = ps_to_script_element(audit_df['Identification_Status'][index])
#             change_comment = f"{name}",str(audit_df['language_specific_audit_comments'][index])

#             data = str(sno) + '. ' + str(change_comment)

#             cells[5].width = Inches(2)
#             para = cells[5].add_paragraph()
#             run = para.add_run()
#             run.text = data
#             run.add_break()
#             sno += 1
#             changes_done = True


#         if audit_df['blank_inserted_after'][index] != 'No':
#             change_comment = 'A blank line is added below'
#             # name = ps_to_script_element(audit_df['Identification_Status'][index])
#             # if name == "Action":
#             #   change_comment = f'{name}Line Wrapped at Prescribed Right Indent 1 Inch'
#             # elif name == "Dialogue":
#             #   change_comment = f'{name}Line Wrapped at Prescribed Right Indent 2 Inch'

#             data = str(sno) + '. ' + str(change_comment)
#             # dataa = data.split()
#             # if dataa[-1] == "nan":
#             #   continue

#             cells[5].width = Inches(2)
#             para = cells[5].add_paragraph()
#             run = para.add_run()
#             run.text = data
#             run.add_break()
#             sno += 1
#             changes_done = True

#         if not changes_done:
#             continue
#             # data = 'No Changes Done'
#             # cells[5].width = Inches(2)
#             # para = cells[5].add_paragraph()
#             # run = para.add_run()
#             # run.text = data
#             # run.add_break()

#         row_index += 1

#     buffer = io.BytesIO()
#     output_doc.save(buffer)
#     buffer.seek(0)


#     #output_doc.save(audit_report_tabular_docx)
#     return buffer

def count_the_line(text_file_path):
    with open(text_file_path, 'r') as fp:
	    lines = len(fp.readlines())
    return lines

def convert_to_pdf(input_docx, out_folder):
    p = subprocess.Popen(
        [
            "libreoffice",
            "--headless",
            "--convert-to",
            "pdf",
            "--outdir",
            out_folder,
            input_docx,
        ]
    )
    print(["--convert-to", "pdf", input_docx])
    p.communicate()


def countPages(docfile, pdf_file_path, base_path_directory):
    convert_to_pdf(docfile, base_path_directory)
    print("converted to pdf")
    print("pdf_file_path",pdf_file_path)
    pdf = PdfFileReader(open(pdf_file_path, "rb"))
    number_of_pages = pdf.getNumPages()
    return number_of_pages


def convert_txt_to_docx(txt_file_path, docx_file_path):
    doc = docx.Document()
    with open(txt_file_path, 'r', encoding='utf-8') as txt:
        text = txt.read()
        doc.add_paragraph(text)  
    doc.save(docx_file_path)
    

def csv_to_docx(csv: pd.DataFrame) -> Document:

    output_doc = Document()
    style = output_doc.styles["Normal"]
    font = style.font
    font.name = "Courier New"
    font.size = Pt(12)
    section = output_doc.sections[0]
    section.page_height = Mm(297)
    a4_right = 8.57
    section.page_width = Inches(a4_right)
    section.left_margin = Inches(1.5)

    for index in csv.index:
        para = output_doc.add_paragraph()

        paragraph_format = para.paragraph_format

        paragraph_format.space_before = Pt(0)
        paragraph_format.space_after = Pt(0)
        paragraph_format.line_spacing = Pt(12)

        script_element = csv["script_element"][index]
        content = csv["content"][index]

        if script_element == "blank":
            continue
        
        elif script_element == "slugline":
            paragraph_format.left_indent = Inches(0)
            paragraph_format.right_indent = Inches(0)
            content = content.upper()

        elif script_element == "action":
            paragraph_format.left_indent = Inches(0)
            paragraph_format.right_indent = Inches(0)

        elif script_element == "dialogue":
            paragraph_format.left_indent = Inches(1.0)
            paragraph_format.right_indent = Inches(1.25)

        elif script_element == "parenthetical":
            paragraph_format.left_indent = Inches(1.5)
            paragraph_format.right_indent = Inches(2.25)

        elif script_element == "speaker":
            paragraph_format.left_indent = Inches(2)
            paragraph_format.right_indent = Inches(1)
            content = content.upper()

        elif script_element == "transition":
            para.alignment = WD_ALIGN_PARAGRAPH.RIGHT
            paragraph_format.left_indent = Inches(2.5)
            paragraph_format.right_indent = Inches(0)

        elif script_element == "special_term":
            paragraph_format.left_indent = Inches(0)
            paragraph_format.right_indent = Inches(0)

        if isinstance(content, float):
            content = ""

        para.text = content

    return output_doc

def language_detector_for_csv(orginal_csv_path):
    try:
        audit_df = pd.read_csv(orginal_csv_path)
    except:
        audit_df = orginal_csv_path

    actionline_lang = []
    dialogue_lang = []

    for index, row in audit_df.iterrows():
        if audit_df["script_element"][index] in ["action"]:
            string_original = audit_df["content"][index]
            src_lang = language_detector(string_original)
            Final_lang = [language_code[src_lang]]
            actionline_lang.append(Final_lang)
            
        elif  audit_df["script_element"][index] in ["dialogue"]:
            string_original = audit_df["content"][index]
            src_lang = language_detector(string_original)
            Final_lang = [language_code[src_lang]]
            dialogue_lang.append(Final_lang) 

    return actionline_lang, dialogue_lang

def assign_para_no(df):
    para_no = 1

    df['para_no'] = 0

    index_iter = iter(df.index)
    for index in df.index:
        line_pos = df['Identification_Status'][index]

        if line_pos == 'blank' :
            continue

        if line_pos == 'ps1':
            df.at[index, 'para_no'] = para_no
            para_no += 1
            continue

        if line_pos == "ps2":
          if df['Identification_Status'][index + 1] == "ps3":
              df.at[index, 'para_no'] = para_no
              df.at[index+1, 'para_no'] = para_no
              para_no += 1
              continue
          else:
              df.at[index, 'para_no'] = para_no
              para_no += 1
              continue

        if line_pos == 'ps4':
            df.at[index, 'para_no'] = para_no
            spot_index = index +1
            while df['Identification_Status'][spot_index] in ["ps5","ps6","ps4"]:
                df.at[spot_index, 'para_no'] = para_no
                spot_index += 1
            para_no += 1
            continue


        if line_pos == 'ps13':
            df.at[index, 'para_no'] = para_no
            spot_index = index +1
            while spot_index < len(df) and df['Identification_Status'][spot_index] in ["ps14","ps15","ps13","blank"]:
                if df['Identification_Status'][spot_index] == "blank":
                    if spot_index + 1 < len(df) and df['Identification_Status'][spot_index+1] == "ps14":
                        df.at[spot_index+1, 'para_no'] = para_no
                        spot_index += 1
                    else:
                      pass
                df.at[spot_index, 'para_no'] = para_no
                spot_index += 1
            para_no += 1
            continue
        
        
        if line_pos == 'ps6':
            if df['Identification_Status'][index-1] in ["ps5","ps4"]:
              continue
            else:
              df.at[index, 'para_no'] = para_no
              para_no += 1

        if line_pos == "ps7":
            df.at[index, 'para_no'] = para_no
            spot_index = index +1
            while df['Identification_Status'][spot_index] in ["ps8","ps9"]:
                df.at[spot_index, 'para_no'] = para_no
                spot_index += 1
            para_no += 1
            continue

        if line_pos == "ps8":
            if df['Identification_Status'][index+1] in ["ps13","ps15"]:
              df.at[index, 'para_no'] = para_no
              para_no += 1
              continue
            else:
                df.at[index, 'para_no'] = para_no
                para_no += 1
                continue


        if line_pos == 'ps15':
          if df['Identification_Status'][index-1] in ["ps7","ps12","ps10","ps20","ps8","blank"]:
            df.at[index, 'para_no'] = para_no
            para_no += 1
            continue
          else:
            continue

        if line_pos == "ps14":
            if df['Identification_Status'][index-1] in ["ps8","ps7"]:
                df.at[index, 'para_no'] = para_no
                spot_index = index +1
                while df['Identification_Status'][spot_index] == "ps15":
                    df.at[spot_index, 'para_no'] = para_no
                    spot_index += 1
                para_no += 1
            else:
                continue

        if line_pos == 'ps11':
            df.at[index, 'para_no'] = para_no
            spot_index = index +1
            while df['Identification_Status'][spot_index] in ["ps12","ps20"]:
                df.at[spot_index, 'para_no'] = para_no
                spot_index += 1
            para_no += 1
            continue

        if line_pos == "ps12":
            if df['Identification_Status'][index-1] in ["ps11","ps20"]:
              continue
            continue

        if line_pos == "ps10":
            df.at[index, 'para_no'] = para_no
            para_no += 1
            continue

        if line_pos == "ps20":
            if df['Identification_Status'][index-1] == "ps11":
                continue
            elif df['Identification_Status'][index+1] == "ps12":
                df.at[index, 'para_no'] = para_no
                df.at[index+1, 'para_no'] = para_no
                para_no += 1
                continue
            para_no += 1
            continue

        if line_pos == 'ps17' :
            df.at[index, 'para_no'] = para_no
            para_no += 1
            continue

        if line_pos == 'ps16' :
            df.at[index, 'para_no'] = para_no
            para_no += 1
            continue


    columns = list(df.columns)
    columns.insert(3, columns.pop(columns.index('para_no')))
    df = df[columns]
    return df


def print_audit_report_tabular_docx(audit_df,scriptname,author,pre_audit_pagenumber,postauditpagenumber,preaudit_line_no,postaudit_line_no,script_language,dialogue_language):

    #line_removed	header	left_indent_corrected	right_indent_corrected	line_wrapped_at_prescribed_right_indent	case_corrected	#blank_inserted_before	blank_inserted_after	blank_deleted_before	blank_deleted_after	space_removed_between_characters	#space_added_between_characters	line_merged_with_next_line	line_broken_into_multiple_lines	punctuation_mark_added	#punctuation_mark_removed

    total_no_blanklines = len(audit_df[audit_df['Identification_Status'].isin(['blank'])])
# <---------------------BLANK LINE ADD AND remove LOGIC IS HERE----------------->
    blankline_added = len(audit_df.loc[(audit_df['line_removed'] == 'No')  & (audit_df['blank_inserted_before'] != 'No'),:] )
    blank_add_after = len(audit_df.loc[(audit_df['line_removed'] == 'No')  & (audit_df['blank_inserted_after'] != 'No'),:] )
    blankline_inserted = blankline_added + blank_add_after

    blankline_rem_before = len(audit_df.loc[(audit_df['line_removed'] == 'No')  & (audit_df['blank_deleted_before'] != 'No'),:] )
    blank_rem_after = len(audit_df.loc[(audit_df['line_removed'] == 'No')  & (audit_df['blank_deleted_after'] != 'No'),:] )
    blankline_removed_total = blankline_rem_before + blank_rem_after

### <<----------------- logic for case --------------------------------->
# for slugline
  # case corrected
    sluglinecase_corrected_count = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['case_corrected'] != 'No') & (audit_df['Identification_Status'].isin(['ps1', 'ps2', 'ps3'])), :])
    print(sluglinecase_corrected_count)
  # indentatioin corrected
    sleft_indent_corrected_count = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['left_indent_corrected'] != 'No')& (audit_df['Identification_Status'].isin(['ps1', 'ps2', 'ps3'])),:])
    sright_indent_corrected_count = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['right_indent_corrected'] != 'No')& (audit_df['Identification_Status'].isin(['ps1', 'ps2', 'ps3'])),:])
    swrapped_lines_count = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['line_wrapped_at_prescribed_right_indent'] != 'No')& (audit_df['Identification_Status'].isin(['ps1', 'ps2', 'ps3'])),:])
    slugline_indentation = sleft_indent_corrected_count + sright_indent_corrected_count + swrapped_lines_count
    print("sluglin_indentation:",slugline_indentation)
  # formate corrected
    slugline_formate1 = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['space_removed_between_characters'] != 'No')& (audit_df['Identification_Status'].isin(['ps1', 'ps2', 'ps3'])),:])
    slugline_formate2 = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['space_added_between_characters'] != 'No')& (audit_df['Identification_Status'].isin(['ps1', 'ps2', 'ps3'])),:])
    slugline_formate3 = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['line_merged_with_next_line'] != 'No')& (audit_df['Identification_Status'].isin(['ps1', 'ps2', 'ps3'])),:])
    slugline_formate4 = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['line_broken_into_multiple_lines'] != 'No')& (audit_df['Identification_Status'].isin(['ps1', 'ps2', 'ps3'])),:])
    slugline_formate5 = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['punctuation_mark_added'] != 'No')& (audit_df['Identification_Status'].isin(['ps1', 'ps2', 'ps3'])),:])
    slugline_formate6 = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['punctuation_mark_removed'] != 'No')& (audit_df['Identification_Status'].isin(['ps1', 'ps2', 'ps3'])),:])
    slugline_formated = slugline_formate1 + slugline_formate2 + slugline_formate3 + slugline_formate4 + slugline_formate5 + slugline_formate6
    print("slugline_formated",slugline_formated)
  #total sluglines
    total_no_sluglines = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['Identification_Status'].isin(['ps1', 'ps2', 'ps3'])), :])
    print(total_no_sluglines)

# for actioon -----line
  # case corrected
    actionlinecase_corrected_count = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['case_corrected'] != 'No') & (audit_df['Identification_Status'].isin(['ps4', 'ps5', 'ps6'])), :])
    print(actionlinecase_corrected_count)
  # indentatioin corrected
    actionleft_indent_corrected_count = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['left_indent_corrected'] != 'No')& (audit_df['Identification_Status'].isin(['ps4', 'ps5', 'ps6'])),:])
    actionright_indent_corrected_count = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['right_indent_corrected'] != 'No')& (audit_df['Identification_Status'].isin(['ps4', 'ps5', 'ps6'])),:])
    actionwrapped_lines_count = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['line_wrapped_at_prescribed_right_indent'] != 'No')& (audit_df['Identification_Status'].isin(['ps4', 'ps5', 'ps6'])),:])
    actionline_indentation = actionleft_indent_corrected_count + actionright_indent_corrected_count + actionwrapped_lines_count
    print("actionliine_indentation:",actionline_indentation)
  # formate corrected
    actionline_formate1 = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['space_removed_between_characters'] != 'No')& (audit_df['Identification_Status'].isin(['ps4', 'ps5', 'ps6'])),:])
    actionline_formate2 = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['space_added_between_characters'] != 'No')& (audit_df['Identification_Status'].isin(['ps4', 'ps5', 'ps6'])),:])
    actionline_formate3 = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['line_merged_with_next_line'] != 'No')& (audit_df['Identification_Status'].isin(['ps4', 'ps5', 'ps6'])),:])
    actionline_formate4 = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['line_broken_into_multiple_lines'] != 'No')& (audit_df['Identification_Status'].isin(['ps4', 'ps5', 'ps6'])),:])
    actionline_formate5 = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['punctuation_mark_added'] != 'No')& (audit_df['Identification_Status'].isin(['ps4', 'ps5', 'ps6'])),:])
    actionline_formate6 = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['punctuation_mark_removed'] != 'No')& (audit_df['Identification_Status'].isin(['ps4', 'ps5', 'ps6'])),:])
    actionline_formated = actionline_formate1 + actionline_formate2 + actionline_formate3 + actionline_formate4 + actionline_formate5 + actionline_formate6
    print("actionline_formated",actionline_formated)
  #total no of actionline
    total_actionlines = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['Identification_Status'].isin(['ps4', 'ps5', 'ps6'])), :])

    print(total_actionlines)


# for Speaker
  # case corrected
    speakercase_corrected_count = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['case_corrected'] != 'No') & (audit_df['Identification_Status'].isin(['ps7', 'ps8', 'ps9'])), :])
    print("speakercase_corrected_count", speakercase_corrected_count)
  # indentatioin corrected
    speakerleft_indent_corrected_count = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['left_indent_corrected'] != 'No')& (audit_df['Identification_Status'].isin(['ps7', 'ps8', 'ps9'])),:])
    speakerright_indent_corrected_count = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['right_indent_corrected'] != 'No')& (audit_df['Identification_Status'].isin(['ps7', 'ps8', 'ps9'])),:])
    speaker_lines_count = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['line_wrapped_at_prescribed_right_indent'] != 'No')& (audit_df['Identification_Status'].isin(['ps7', 'ps8', 'ps9'])),:])
    speaker_indentation = speakerleft_indent_corrected_count + speakerright_indent_corrected_count + speaker_lines_count
    print("speaker_indentation:",speaker_indentation)
  # formate corrected
    speaker_formate1 = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['space_removed_between_characters'] != 'No')& (audit_df['Identification_Status'].isin(['ps7', 'ps8', 'ps9'])),:])
    speaker_formate2 = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['space_added_between_characters'] != 'No')& (audit_df['Identification_Status'].isin(['ps7', 'ps8', 'ps9'])),:])
    speaker_formate3 = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['line_merged_with_next_line'] != 'No')& (audit_df['Identification_Status'].isin(['ps7', 'ps8', 'ps9'])),:])
    speaker_formate4 = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['line_broken_into_multiple_lines'] != 'No')& (audit_df['Identification_Status'].isin(['ps7', 'ps8', 'ps9'])),:])
    speaker_formate5 = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['punctuation_mark_added'] != 'No')& (audit_df['Identification_Status'].isin(['ps7', 'ps8', 'ps9'])),:])
    speaker_formate6 = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['punctuation_mark_removed'] != 'No')& (audit_df['Identification_Status'].isin(['ps7', 'ps8', 'ps9'])),:])
    speaker_formated = speaker_formate1 + speaker_formate2 + speaker_formate3 + speaker_formate4 + speaker_formate5 + speaker_formate6
    print("speaker_formated",speaker_formated)
  #total no of speaker -speaker
    total_no_speaker = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['Identification_Status'].isin(['ps7', 'ps8', 'ps9'])),:])

    print(total_no_speaker)


# for Parenthetical -----line
  # case corrected
    parentheticalcase_corrected_count = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['case_corrected'] != 'No') & (audit_df['Identification_Status'].isin(['ps10', 'ps11', 'ps12', 'ps20'])), :])
    print(parentheticalcase_corrected_count)
  # indentatioin corrected
    parenthetical_left_indent_corrected_count = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['left_indent_corrected'] != 'No')& (audit_df['Identification_Status'].isin(['ps10', 'ps11', 'ps12', 'ps20'])),:])
    parenthetical_right_indent_corrected_count = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['right_indent_corrected'] != 'No')& (audit_df['Identification_Status'].isin(['ps10', 'ps11', 'ps12', 'ps20'])),:])
    parenthetical_wrapped_lines_count = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['line_wrapped_at_prescribed_right_indent'] != 'No')& (audit_df['Identification_Status'].isin(['ps10', 'ps11', 'ps12', 'ps20'])),:])
    parenthetical_line_indentation = parenthetical_left_indent_corrected_count + parenthetical_right_indent_corrected_count + parenthetical_wrapped_lines_count
    print("parenthetical_line_indentation:",parenthetical_line_indentation)
  # formate corrected
    parenthetical_formate1 = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['space_removed_between_characters'] != 'No')& (audit_df['Identification_Status'].isin(['ps10', 'ps11', 'ps12', 'ps20'])),:])
    parenthetical_formate2 = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['space_added_between_characters'] != 'No') & (audit_df['Identification_Status'].isin(['ps10', 'ps11', 'ps12', 'ps20'])),:])
    parenthetical_formate3 = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['line_merged_with_next_line'] != 'No')& (audit_df['Identification_Status'].isin(['ps10', 'ps11', 'ps12', 'ps20'])),:])
    parenthetical_formate4 = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['line_broken_into_multiple_lines'] != 'No')& (audit_df['Identification_Status'].isin(['ps10', 'ps11', 'ps12', 'ps20'])),:])
    parenthetical_formate5 = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['punctuation_mark_added'] != 'No')& (audit_df['Identification_Status'].isin(['ps10', 'ps11', 'ps12', 'ps20'])),:])
    parenthetical_formate6 = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['punctuation_mark_removed'] != 'No')& (audit_df['Identification_Status'].isin(['ps10', 'ps11', 'ps12', 'ps20'])),:])
    parenthetical_formated = parenthetical_formate1 + parenthetical_formate2 + parenthetical_formate3 + parenthetical_formate4 + parenthetical_formate5 + parenthetical_formate6
    print("parenthetical_formated",parenthetical_formated)
  #total number of parenthetical
    total_no_parenthetical = len(audit_df.loc[(audit_df['line_removed'] == 'No')& (audit_df['Identification_Status'].isin(['ps10', 'ps11', 'ps12', 'ps20'])),:])

    print(total_no_parenthetical)


# for Dialogue -----line
  # case corrected
    Dialogue_case_corrected_count = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['case_corrected'] != 'No') & (audit_df['Identification_Status'].isin(['ps13', 'ps14', 'ps15'])), :])
    print(Dialogue_case_corrected_count)
  # indentatioin corrected
    dialogue_left_indent_corrected_count = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['left_indent_corrected'] != 'No')& (audit_df['Identification_Status'].isin(['ps13', 'ps14', 'ps15'])),:])
    dialogue_right_indent_corrected_count = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['right_indent_corrected'] != 'No')& (audit_df['Identification_Status'].isin(['ps13', 'ps14', 'ps15'])),:])
    dialogue_wrapped_lines_count = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['line_wrapped_at_prescribed_right_indent'] != 'No')& (audit_df['Identification_Status'].isin(['ps13', 'ps14', 'ps15'])),:])
    dialogue_line_indentation = dialogue_left_indent_corrected_count + dialogue_right_indent_corrected_count + dialogue_wrapped_lines_count
    print("dialogue_line_indentation:",dialogue_line_indentation)
  # formate corrected
    dialogue_formate1 = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['space_removed_between_characters'] != 'No')& (audit_df['Identification_Status'].isin(['ps13', 'ps14', 'ps15'])),:])
    dialogue_formate2 = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['space_added_between_characters'] != 'No')& (audit_df['Identification_Status'].isin(['ps13', 'ps14', 'ps15'])),:])
    dialogue_formate3 = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['line_merged_with_next_line'] != 'No')& (audit_df['Identification_Status'].isin(['ps13', 'ps14', 'ps15'])),:])
    dialogue_formate4 = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['line_broken_into_multiple_lines'] != 'No')& (audit_df['Identification_Status'].isin(['ps13', 'ps14', 'ps15'])),:])
    dialogue_formate5 = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['punctuation_mark_added'] != 'No')& (audit_df['Identification_Status'].isin(['ps13', 'ps14', 'ps15'])),:])
    dialogue_formate6 = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['punctuation_mark_removed'] != 'No')& (audit_df['Identification_Status'].isin(['ps13', 'ps14', 'ps15'])),:])
    dialogue_formated = dialogue_formate1 + dialogue_formate2 + dialogue_formate3 + dialogue_formate4 + dialogue_formate5 + dialogue_formate6
    print("dialogue_formated",dialogue_formated)
  # total number of dialogue
    total_no_dialogue = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['Identification_Status'].isin(['ps13', 'ps14', 'ps15'])),:])

    print(total_no_dialogue)

# for Transistion -----line
  # case corrected
    transitions_case_corrected_count = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['case_corrected'] != 'No') & (audit_df['Identification_Status'].isin(['ps16'])), :])
    print(transitions_case_corrected_count)
  # indentatioin corrected
    transitions_left_indent_corrected_count = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['left_indent_corrected'] != 'No')& (audit_df['Identification_Status'].isin(['ps16'])),:])
    transitions_right_indent_corrected_count = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['right_indent_corrected'] != 'No')& (audit_df['Identification_Status'].isin(['ps16'])),:])
    transitions_wrapped_lines_count = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['line_wrapped_at_prescribed_right_indent'] != 'No')& (audit_df['Identification_Status'].isin(['ps16'])),:])
    transitions_line_indentation = transitions_left_indent_corrected_count + transitions_right_indent_corrected_count + transitions_wrapped_lines_count
    print("transitions_line_indentation:",transitions_line_indentation)
  # formate corrected
    transitions_formate1 = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['space_removed_between_characters'] != 'No')& (audit_df['Identification_Status'].isin(['ps16'])),:])
    transitions_formate2 = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['space_added_between_characters'] != 'No')& (audit_df['Identification_Status'].isin(['ps16'])),:])
    transitions_formate3 = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['line_merged_with_next_line'] != 'No')& (audit_df['Identification_Status'].isin(['ps16'])),:])
    transitions_formate4 = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['line_broken_into_multiple_lines'] != 'No')& (audit_df['Identification_Status'].isin(['ps16'])),:])
    transitions_formate5 = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['punctuation_mark_added'] != 'No')& (audit_df['Identification_Status'].isin(['ps16'])),:])
    transitions_formate6 = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['punctuation_mark_removed'] != 'No')& (audit_df['Identification_Status'].isin(['ps16'])),:])
    transitions_formated = transitions_formate1 + transitions_formate2 + transitions_formate3 + transitions_formate4 + transitions_formate5 + transitions_formate6
    print("transitions_formated",transitions_formated)
  #total transition
    total_no_transition = len(audit_df.loc[audit_df['Identification_Status'].isin(['ps16']),:])

    print(total_no_transition)


# for Spectial Terms -----line
  # case corrected
    st_case_corrected_count = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['case_corrected'] != 'No') & (audit_df['Identification_Status'].isin(['ps17'])), :])
    print("st_case_corrected_count",st_case_corrected_count)
  # indentatioin corrected
    st_left_indent_corrected_count = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['left_indent_corrected'] != 'No')& (audit_df['Identification_Status'].isin(['ps17'])),:])
    st_right_indent_corrected_count = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['right_indent_corrected'] != 'No')& (audit_df['Identification_Status'].isin(['ps17'])),:])
    st_wrapped_lines_count = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['line_wrapped_at_prescribed_right_indent'] != 'No')& (audit_df['Identification_Status'].isin(['ps17'])),:])
    st_line_indentation = st_left_indent_corrected_count + st_right_indent_corrected_count + st_wrapped_lines_count
    print("st_line_indentation:",st_line_indentation)
  # formate corrected
    st_formate1 = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['space_removed_between_characters'] != 'No')& (audit_df['Identification_Status'].isin(['ps17'])),:])
    st_formate2 = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['space_added_between_characters'] != 'No')& (audit_df['Identification_Status'].isin(['ps17'])),:])
    st_formate3 = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['line_merged_with_next_line'] != 'No')& (audit_df['Identification_Status'].isin(['ps17'])),:])
    st_formate4 = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['line_broken_into_multiple_lines'] != 'No')& (audit_df['Identification_Status'].isin(['ps17'])),:])
    st_formate5 = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['punctuation_mark_added'] != 'No')& (audit_df['Identification_Status'].isin(['ps17'])),:])
    st_formate6 = len(audit_df.loc[(audit_df['line_removed'] == 'No') & (audit_df['punctuation_mark_removed'] != 'No')& (audit_df['Identification_Status'].isin(['ps17'])),:])
    st_formated = st_formate1 + st_formate2 + st_formate3 + st_formate4 + st_formate5 + st_formate6
    print("st_formated",st_formated)
  #total numner of special terms
    total_special_terms = len(audit_df.loc[audit_df['Identification_Status'].isin(['ps17']),:])
    if total_special_terms < 1 :
      total_special_terms = 1
    print(total_special_terms)


# write logic for the percentage
    #a
    difference_of_page_no = int(pre_audit_pagenumber) - int(postauditpagenumber)
    average_of_page_no = (int(pre_audit_pagenumber) + int(postauditpagenumber)) / 2
    final_ratio_pageno = (difference_of_page_no / average_of_page_no) * 100

    #b
    difference_of_line_no = int(preaudit_line_no)- int(postaudit_line_no)
    average_of_line_no = (int(preaudit_line_no) + int(postaudit_line_no)) / 2
    final_ratio_lineno = (difference_of_line_no / average_of_line_no) * 100

    #c
    try:
        ratio_for_blanklines = ((int(blankline_inserted) + int(blankline_removed_total)) / average_of_line_no) *100
    except:
      ratio_for_blanklines = 0

    #j
    try:
        ratio_for_sluglines = ((int(sluglinecase_corrected_count)+int(slugline_indentation)+int(slugline_formated))/total_no_sluglines)*100
    except:
        ratio_for_sluglines = 0
    #d
    try:
        ratio_for_actionlines = ((int(actionlinecase_corrected_count)+ int(actionline_indentation)+ int(total_actionlines))/total_actionlines)*100
    except:
        ratio_for_actionlines = 0

    #e
    try:
        ratio_for_Speaker = ((int(speakercase_corrected_count)+int(speaker_formated)+int(speaker_formated))/ total_actionlines)*100
    except:
        ratio_for_Speaker = 0


    #f
    try:
        ratio_for_parenthetical = ((int(parentheticalcase_corrected_count)+int(parenthetical_line_indentation)+int(parenthetical_formated)) / total_no_parenthetical)*100
    except:
        ratio_for_parenthetical = 0
    #g
    try:
        ratio_for_dialogues = ((int(Dialogue_case_corrected_count)+int(dialogue_line_indentation)+int(dialogue_formated)) / total_no_dialogue)*100
    except:
        ratio_for_dialogues = 0
    #h
    try:
        ratio_for_transitions = ((int(transitions_case_corrected_count)+int(transitions_line_indentation)+int(transitions_formated)) / total_no_transition)*100
    except:
        ratio_for_transitions = 0

    #i
    try:
        ratio_for_special_terms = ((int(st_case_corrected_count)+int(st_line_indentation)+int(st_formated))/total_special_terms) * 100
    except:
        ratio_for_special_terms = 0

    average_of_c_j = (ratio_for_sluglines+ratio_for_actionlines+ratio_for_Speaker+ratio_for_parenthetical+ratio_for_dialogues+ratio_for_transitions+ratio_for_special_terms)/7
    audit_configuration_percentage = (final_ratio_pageno+final_ratio_lineno+ratio_for_blanklines) + (average_of_c_j)
    audit_configuration_percentage_str = f"{audit_configuration_percentage:.2f}%"
    print("audit_configuration_percentage",audit_configuration_percentage_str)

    total_script_element_correct = (total_no_sluglines+total_actionlines+total_no_speaker+total_no_parenthetical+total_no_dialogue+total_no_transition+total_special_terms)
    print("total_script_element_correct",total_script_element_correct)
    audit_script_accuracy = (total_no_sluglines+total_actionlines+total_no_speaker+total_no_parenthetical+total_no_dialogue+total_no_transition+total_special_terms+total_no_blanklines)/preaudit_line_no
    print("audit_script_accuracy",audit_script_accuracy)
    # audit_script_accuracy_str = min(audit_script_accuracy*100 , 100)
    audit_script_accuracy_str = audit_script_accuracy*100
    audit_script_accuracy_str = f"{audit_script_accuracy_str:.2f}%"
    print("audit_script_accuracy_str",audit_script_accuracy_str)


# the table logics ends here
# percenteage table from here

    output_doc = Document()
    style = output_doc.styles['Normal']
    font = style.font
    #font.name = 'Courier New'
    font.size = Pt(10)

    section = output_doc.sections[-1]
    section.orientation = WD_ORIENT.LANDSCAPE

    section.page_width = Inches(11)
    section.left_margin = Inches(0.25)
    section.right_margin = Inches(0.25)

    para = output_doc.add_paragraph()
    para.alignment = WD_ALIGN_PARAGRAPH.CENTER

# Audit Summary at center of the page with bold
    run = para.add_run()
    font = run.font
    font.bold = True
    font.size = Pt(18)
    run.text = ' Audit Summary'
    para = output_doc.add_paragraph()
    # run.add_break()

    # Add a paragraph for the left-aligned "Audit Date"
    current_date = date.today()
    # Convert to the "day month year" format
    formatted_date = current_date.strftime("%d %B %Y")
    string_date = "𝐀𝐮𝐝𝐢𝐭 𝐃𝐚𝐭𝐞"
    left_aligned_text = output_doc.add_paragraph("\t\t\t\t\t\t\t\t\t\t\t\t\t\t"+ string_date+ " : " + str(formatted_date))
    left_aligned_text.alignment = WD_ALIGN_PARAGRAPH.LEFT
    font_audit_date = left_aligned_text.runs[0].font
    font_audit_date.size = Pt(14)
    font.bold = True
    para = output_doc.add_paragraph()

    table = output_doc.add_table(rows=2, cols=2)
    table.alignment = WD_TABLE_ALIGNMENT.CENTER
    table.style = 'Colorful Shading Accent 6'
    table.autofit = False  # Turn off autofit to set cell widths explicitly

    # Set cell widths (you can adjust these values as needed)
    table.columns[0].width = Pt(150)
    table.columns[1].width = Pt(100)
    # Access the first cell in the first row
    cell = table.cell(0, 0)
    cell.text = "Audit Contributions"
    cell1 = table.cell(0, 1)
    cell1.text = audit_configuration_percentage_str
    for paragraph in cell.paragraphs:
        for run in paragraph.runs:
            run.font.size = Pt(14)
    for paragraph in cell1.paragraphs:
        for run in paragraph.runs:
            run.font.size = Pt(14)
            paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER
            
    # Set vertical alignment to top
    cell.vertical_alignment = WD_CELL_VERTICAL_ALIGNMENT.CENTER
    # Access the first cell in the second row
    cell = table.cell(1, 0)
    cell.text = "Audited Accuracy"
    cell1 = table.cell(1,1)
    cell1.text = audit_script_accuracy_str
    for paragraph in cell.paragraphs:
        for run in paragraph.runs:
            run.font.size = Pt(14)
    for paragraph in cell1.paragraphs:
        for run in paragraph.runs:
            run.font.size = Pt(14)
            paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER
    # Set vertical alignment to top
    cell.vertical_alignment = WD_CELL_VERTICAL_ALIGNMENT.CENTER

    para = output_doc.add_paragraph()
    para = output_doc.add_paragraph()
    para = output_doc.add_paragraph()
    
    right_aligned_text = para.add_run('\t\t𝐒𝐜𝐫𝐢𝐩𝐭 𝐍𝐚𝐦𝐞: ' + str(scriptname))
    right_aligned_text.alignment = WD_ALIGN_PARAGRAPH.RIGHT
    font_right = right_aligned_text.font
    font_right.size = Pt(14)

    author_para = output_doc.add_paragraph()
    run_author = author_para.add_run("\t\t𝐀𝐮��𝐡𝐨𝐫: " + str(author))
    font_author = run_author.font
    font_author.size = Pt(14)

    language_script_para = output_doc.add_paragraph()
    run_language_script = language_script_para.add_run("\t\t𝐋𝐚𝐧𝐠𝐮𝐚𝐠𝐞 𝐨𝐟 𝐒𝐜𝐫𝐢𝐩𝐭: " + str(script_language))
    font_language_script = run_language_script.font
    font_language_script.size = Pt(14)

    language_dialogue_para = output_doc.add_paragraph()
    run_language_dialogue = language_dialogue_para.add_run("\t\t𝐋𝐚𝐧𝐠𝐮𝐚𝐠𝐞 𝐨𝐟 𝐃𝐢𝐚𝐥𝐨𝐠𝐮𝐞: " + str(dialogue_language))
    font_language_dialogue = run_language_dialogue.font
    font_language_dialogue.size = Pt(14)
    # Remove line spacing for the entire document
    for para in output_doc.paragraphs:
        para.paragraph_format.space_before = Pt(1)
        para.paragraph_format.space_after = Pt(1)

    para = output_doc.add_paragraph()
    para = output_doc.add_paragraph()

    # changes_string_line = output_doc.add_paragraph()
    # run_changes_string_line = changes_string_line.add_run("\t\tStructural Changes\t\t\t\t\\t Blank Lines Adjustments ")
    # font_changes_string_line = run_changes_string_line.font
    # # Set font properties
    # font_changes_string_line.color.rgb = WD_COLOR_INDEX.RED  # Red font color
    # font_changes_string_line.italic = True  # Italic style
    # font_changes_string_line.bold = True
    paragraph = output_doc.add_paragraph()
    paragraph = output_doc.add_paragraph()
    run = paragraph.add_run("\t\t\t\tStructural Changes\t\t\t\t\t\tBlank Lines Adjustments ")
    # Set font size
    font = run.font
    font.size = Pt(14)
    # Set font color to red
    font.color.rgb = RGBColor(255, 0, 0)
    font.bold = True
    font.italic = True
    for para in output_doc.paragraphs:
        para.paragraph_format.space_before = Pt(0)
        para.paragraph_format.space_after = Pt(0)
    
    
    table  = output_doc.add_table(rows=1, cols=2)
    table.allow_autofit = False
    table.alignment = WD_TABLE_ALIGNMENT.CENTER
    table._cells[0].width = Inches(4.3)
    table._cells[1].width = Inches(4.3) 
    

    column_first = table._cells[0].add_table(rows=3, cols=3)
    column_second = table._cells[1].add_table(rows=2, cols=2)
    column_first.style = 'Colorful Shading Accent 6' 
    column_second.style = 'Colorful Shading Accent 6' 

    column_first_row1 = column_first.cell(0,1)
    column_first_row1.text ="Pre Audit"
    column_first_row1.vertical_alignment = WD_CELL_VERTICAL_ALIGNMENT.CENTER 
    for paragraph in column_first_row1.paragraphs:
        for run in paragraph.runs:
            run.font.size = Pt(14) 
    
    column_first_row1_c2 = column_first.cell(0,2)
    column_first_row1_c2.text ="Post Audit"
    column_first_row1_c2.vertical_alignment = WD_CELL_VERTICAL_ALIGNMENT.CENTER 
    for paragraph in column_first_row1_c2.paragraphs:
        for run in paragraph.runs:
            run.font.size = Pt(14) 

    column_first_row2_c1 = column_first.cell(1,0)
    column_first_row2_c1.text ="No of Pages"
    column_first_row2_c1.vertical_alignment = WD_CELL_VERTICAL_ALIGNMENT.CENTER 
    for paragraph in column_first_row2_c1.paragraphs:
        for run in paragraph.runs:
            run.font.size = Pt(14)

    column_first_row2_c2 = column_first.cell(1,1)
    column_first_row2_c2.text =   str(pre_audit_pagenumber)
    column_first_row2_c2.vertical_alignment = WD_CELL_VERTICAL_ALIGNMENT.CENTER 
    for paragraph in column_first_row2_c2.paragraphs:
        for run in paragraph.runs:
            run.font.size = Pt(14)
            paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER

    column_first_row2_c3 = column_first.cell(1,2)
    column_first_row2_c3.text =   str(postauditpagenumber)
    column_first_row2_c3.vertical_alignment = WD_CELL_VERTICAL_ALIGNMENT.CENTER 
    for paragraph in column_first_row2_c3.paragraphs:
        for run in paragraph.runs:
            run.font.size = Pt(14)
            paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER

    column_first_row3_c1 = column_first.cell(2,0)
    column_first_row3_c1.text =   "No of Lines"
    column_first_row3_c1.vertical_alignment = WD_CELL_VERTICAL_ALIGNMENT.CENTER 
    for paragraph in column_first_row3_c1.paragraphs:
        for run in paragraph.runs:
            run.font.size = Pt(14)

    column_first_row3_c2 = column_first.cell(2,1)
    column_first_row3_c2.text =   str(preaudit_line_no)
    column_first_row3_c2.vertical_alignment = WD_CELL_VERTICAL_ALIGNMENT.CENTER 
    for paragraph in column_first_row3_c2.paragraphs:
        for run in paragraph.runs:
            run.font.size = Pt(14)
            paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER

    column_first_row3_c3 = column_first.cell(2,2)
    column_first_row3_c3.text =  str(postaudit_line_no)
    column_first_row3_c3.vertical_alignment = WD_CELL_VERTICAL_ALIGNMENT.CENTER 
    for paragraph in column_first_row3_c3.paragraphs:
        for run in paragraph.runs:
            run.font.size = Pt(14)
            paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER

    column_second_row1_c1 = column_second.cell(0,0)
    column_second_row1_c1.text =  "Blank Lines Added"
    column_second_row1_c1.vertical_alignment = WD_CELL_VERTICAL_ALIGNMENT.CENTER 
    for paragraph in column_second_row1_c1.paragraphs:
        for run in paragraph.runs:
            run.font.size = Pt(14)

    column_second_row1_c2 = column_second.cell(0,1)
    column_second_row1_c2.text =  str(blankline_inserted)
    column_second_row1_c2.vertical_alignment = WD_CELL_VERTICAL_ALIGNMENT.CENTER 
    for paragraph in column_second_row1_c2.paragraphs:
        for run in paragraph.runs:
            run.font.size = Pt(14)
            paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER

    column_second_row2_c1 = column_second.cell(1,0)
    column_second_row2_c1.text =  "Blank Lines Removed"
    column_second_row2_c1.vertical_alignment = WD_CELL_VERTICAL_ALIGNMENT.CENTER 
    for paragraph in column_second_row2_c1.paragraphs:
        for run in paragraph.runs:
            run.font.size = Pt(14)

    column_second_row2_c2 = column_second.cell(1,1)
    column_second_row2_c2.text =  str(blankline_removed_total)
    column_second_row2_c2.vertical_alignment = WD_CELL_VERTICAL_ALIGNMENT.CENTER 
    for paragraph in column_second_row2_c2.paragraphs:
        for run in paragraph.runs:
            run.font.size = Pt(14)
            paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER
          
    paragraph = output_doc.add_paragraph()
    paragraph = output_doc.add_paragraph()
    paragraph = output_doc.add_paragraph()
    run = paragraph.add_run("Summary of Correction made")
    # Set font size
    font = run.font
    font.size = Pt(14)
    # Set font color to red
    font.color.rgb = RGBColor(255, 0, 0)
    font.bold = True
    font.italic = True
    paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER

    summary_table  = output_doc.add_table(rows=9, cols=5)
    summary_table.allow_autofit = False
    summary_table.alignment = WD_TABLE_ALIGNMENT.CENTER
    summary_table.style = 'Colorful Shading Accent 6'
    # # Calculate the column widths
    # column_widths = [Inches(1), Inches(1.5), Inches(1.5), Inches(1.5), Inches(1)]  # Adjust the widths as needed

    # # Set the column widths
    # for col, width in enumerate(column_widths):
    #     summary_table.columns[col].width = width
    # table.columns[0].width = Pt(150)
    # table.columns[1].width = Pt(100)
    # summary_table.columns[0].width = Inches(1)
    # summary_table.columns[1].width = Inches(1) 
    # summary_table.columns[2].width = Inches(1) 
    # summary_table.columns[3].width = Inches(1) 
    # summary_table.columns[4].width = Inches(0.5) 

    summary_table_row1_col_2 = summary_table.cell(0,1)
    summary_table_row1_col_2.text ="Case Correction"
    summary_table_row1_col_2.vertical_alignment = WD_CELL_VERTICAL_ALIGNMENT.CENTER 
    for paragraph in summary_table_row1_col_2.paragraphs:
        for run in paragraph.runs:
            run.font.size = Pt(14) 
            paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER

    summary_table_row1_col_3 = summary_table.cell(0,2)
    summary_table_row1_col_3.text ="Indent Correction"
    summary_table_row1_col_3.vertical_alignment = WD_CELL_VERTICAL_ALIGNMENT.CENTER 
    for paragraph in summary_table_row1_col_3.paragraphs:
        for run in paragraph.runs:
            run.font.size = Pt(14) 
            paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER

    summary_table_row1_col_4 = summary_table.cell(0,3)
    summary_table_row1_col_4.text ="Format Correction"
    summary_table_row1_col_4.vertical_alignment = WD_CELL_VERTICAL_ALIGNMENT.CENTER 
    for paragraph in summary_table_row1_col_4.paragraphs:
        for run in paragraph.runs:
            run.font.size = Pt(14)
            paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER 
    
    summary_table_row1_col_5 = summary_table.cell(0,4)
    summary_table_row1_col_5.text ="Total"
    summary_table_row1_col_5.vertical_alignment = WD_CELL_VERTICAL_ALIGNMENT.CENTER 
    summary_table_row1_col_5.width = Inches(0.5)

    for paragraph in summary_table_row1_col_5.paragraphs:
        for run in paragraph.runs:
            run.font.size = Pt(14) 
            run.font.bold = True
            paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER

          #---------ROW 2------------
    summary_table_row2_col_1 = summary_table.cell(1,0)
    summary_table_row2_col_1.text ="Sluglines"
    summary_table_row2_col_1.vertical_alignment = WD_CELL_VERTICAL_ALIGNMENT.CENTER 
    for paragraph in summary_table_row2_col_1.paragraphs:
      for run in paragraph.runs:
          run.font.size = Pt(14)

    summary_table_row2_col_2 = summary_table.cell(1,1)
    summary_table_row2_col_2.text = str(sluglinecase_corrected_count)
    summary_table_row2_col_2.vertical_alignment = WD_CELL_VERTICAL_ALIGNMENT.CENTER 
    for paragraph in summary_table_row2_col_2.paragraphs:
        for run in paragraph.runs:
            run.font.size = Pt(14)
            paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER

    summary_table_row2_col_3 = summary_table.cell(1,2)
    summary_table_row2_col_3.text = str(slugline_indentation)
    summary_table_row2_col_3.vertical_alignment = WD_CELL_VERTICAL_ALIGNMENT.CENTER 
    for paragraph in summary_table_row2_col_3.paragraphs:
        for run in paragraph.runs:
            run.font.size = Pt(14)
            paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER
  
    summary_table_row2_col_4 = summary_table.cell(1,3)
    summary_table_row2_col_4.text = str(slugline_formated)
    summary_table_row2_col_4.vertical_alignment = WD_CELL_VERTICAL_ALIGNMENT.CENTER 
    for paragraph in summary_table_row2_col_4.paragraphs:
        for run in paragraph.runs:
            run.font.size = Pt(14)
            paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER
            
    summary_table_row2_col_5 = summary_table.cell(1,4)
    total_slug = slugline_formated+slugline_indentation+sluglinecase_corrected_count
    summary_table_row2_col_5.text = str(total_slug)
    summary_table_row2_col_5.vertical_alignment = WD_CELL_VERTICAL_ALIGNMENT.CENTER 
    for paragraph in summary_table_row2_col_5.paragraphs:
        for run in paragraph.runs:
            run.font.size = Pt(14)
            paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER


    summary_table_row3_col_1 = summary_table.cell(2,0)
    summary_table_row3_col_1.text = "Action Lines"
    summary_table_row3_col_1.vertical_alignment = WD_CELL_VERTICAL_ALIGNMENT.CENTER 
    for paragraph in summary_table_row3_col_1.paragraphs:
        for run in paragraph.runs:
            run.font.size = Pt(14)
            
            
    summary_table_row3_col_2 = summary_table.cell(2,1)
    summary_table_row3_col_2.text = str(actionlinecase_corrected_count)
    summary_table_row3_col_2.vertical_alignment = WD_CELL_VERTICAL_ALIGNMENT.CENTER 
    for paragraph in summary_table_row3_col_2.paragraphs:
        for run in paragraph.runs:
            run.font.size = Pt(14)
            paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER
            
    summary_table_row3_col_3 = summary_table.cell(2,2)
    summary_table_row3_col_3.text = str(actionline_indentation)
    summary_table_row3_col_3.vertical_alignment = WD_CELL_VERTICAL_ALIGNMENT.CENTER 
    for paragraph in summary_table_row3_col_3.paragraphs:
        for run in paragraph.runs:
            run.font.size = Pt(14)
            paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER
            
    summary_table_row3_col_4 = summary_table.cell(2,3)
    summary_table_row3_col_4.text = str(actionline_formated)
    summary_table_row3_col_4.vertical_alignment = WD_CELL_VERTICAL_ALIGNMENT.CENTER 
    for paragraph in summary_table_row3_col_4.paragraphs:
        for run in paragraph.runs:
            run.font.size = Pt(14)
            paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER
            
    summary_table_row3_col_5 = summary_table.cell(2,4)
    total_action_line = actionlinecase_corrected_count+actionline_indentation+actionline_formated
    summary_table_row3_col_5.text = str(total_action_line)
    summary_table_row3_col_5.vertical_alignment = WD_CELL_VERTICAL_ALIGNMENT.CENTER 
    for paragraph in summary_table_row3_col_5.paragraphs:
        for run in paragraph.runs:
            run.font.size = Pt(14)
            paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER

          #-----ROW 4 ---------

    summary_table_row4_col_1 = summary_table.cell(3,0)
    summary_table_row4_col_1.text = "Speaker"
    summary_table_row4_col_1.vertical_alignment = WD_CELL_VERTICAL_ALIGNMENT.CENTER 
    for paragraph in summary_table_row4_col_1.paragraphs:
        for run in paragraph.runs:
            run.font.size = Pt(14)
            # paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER

    summary_table_row4_col_2 = summary_table.cell(3,1)
    summary_table_row4_col_2.text = str(speakercase_corrected_count)
    summary_table_row4_col_2.vertical_alignment = WD_CELL_VERTICAL_ALIGNMENT.CENTER 
    for paragraph in summary_table_row4_col_2.paragraphs:
        for run in paragraph.runs:
            run.font.size = Pt(14)
            paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER

    summary_table_row4_col_3 = summary_table.cell(3,2)
    summary_table_row4_col_3.text = str(speaker_indentation)
    summary_table_row4_col_3.vertical_alignment = WD_CELL_VERTICAL_ALIGNMENT.CENTER 
    for paragraph in summary_table_row4_col_3.paragraphs:
        for run in paragraph.runs:
            run.font.size = Pt(14)
            paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER

    summary_table_row4_col_4 = summary_table.cell(3,3)
    summary_table_row4_col_4.text = str(speaker_formated)
    summary_table_row4_col_4.vertical_alignment = WD_CELL_VERTICAL_ALIGNMENT.CENTER 
    for paragraph in summary_table_row4_col_4.paragraphs:
        for run in paragraph.runs:
            run.font.size = Pt(14)
            paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER

    summary_table_row4_col_5 = summary_table.cell(3,4)
    total_speaker = speaker_formated+speaker_indentation+speakercase_corrected_count
    summary_table_row4_col_5.text = str(total_speaker)
    summary_table_row4_col_5.vertical_alignment = WD_CELL_VERTICAL_ALIGNMENT.CENTER 
    for paragraph in summary_table_row4_col_5.paragraphs:
        for run in paragraph.runs:
            run.font.size = Pt(14)
            paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER
    

  # ----ROW 5 -------
    summary_table_row5_col_1 = summary_table.cell(4,0)
    summary_table_row5_col_1.text = "Parentheticals"
    summary_table_row5_col_1.vertical_alignment = WD_CELL_VERTICAL_ALIGNMENT.CENTER 
    for paragraph in summary_table_row5_col_1.paragraphs:
        for run in paragraph.runs:
            run.font.size = Pt(14)


    summary_table_row5_col_2 = summary_table.cell(4,1)
    summary_table_row5_col_2.text = str(parentheticalcase_corrected_count)
    summary_table_row5_col_2.vertical_alignment = WD_CELL_VERTICAL_ALIGNMENT.CENTER 
    for paragraph in summary_table_row5_col_2.paragraphs:
        for run in paragraph.runs:
            run.font.size = Pt(14)
            paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER

    summary_table_row5_col_3 = summary_table.cell(4,2)
    summary_table_row5_col_3.text = str(parenthetical_line_indentation)
    summary_table_row5_col_3.vertical_alignment = WD_CELL_VERTICAL_ALIGNMENT.CENTER 
    for paragraph in summary_table_row5_col_3.paragraphs:
        for run in paragraph.runs:
            run.font.size = Pt(14)
            paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER

    summary_table_row5_col_4 = summary_table.cell(4,3)
    summary_table_row5_col_4.text = str(parenthetical_formated)
    summary_table_row5_col_4.vertical_alignment = WD_CELL_VERTICAL_ALIGNMENT.CENTER 
    for paragraph in summary_table_row5_col_4.paragraphs:
        for run in paragraph.runs:
            run.font.size = Pt(14)
            paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER

    summary_table_row5_col_5 = summary_table.cell(4,4)
    total_parenthetical = parenthetical_formated + parenthetical_line_indentation+parentheticalcase_corrected_count
    summary_table_row5_col_5.text = str(total_parenthetical)
    summary_table_row5_col_5.vertical_alignment = WD_CELL_VERTICAL_ALIGNMENT.CENTER 
    for paragraph in summary_table_row5_col_5.paragraphs:
        for run in paragraph.runs:
            run.font.size = Pt(14)
            paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER


          #ROW --- 6
    summary_table_row6_col_1 = summary_table.cell(5,0)
    summary_table_row6_col_1.text = "Dialogue"
    summary_table_row6_col_1.vertical_alignment = WD_CELL_VERTICAL_ALIGNMENT.CENTER 
    for paragraph in summary_table_row6_col_1.paragraphs:
        for run in paragraph.runs:
            run.font.size = Pt(14)

    summary_table_row6_col_2 = summary_table.cell(5,1)
    summary_table_row6_col_2.text = str(Dialogue_case_corrected_count)
    summary_table_row6_col_2.vertical_alignment = WD_CELL_VERTICAL_ALIGNMENT.CENTER 
    for paragraph in summary_table_row6_col_2.paragraphs:
        for run in paragraph.runs:
            run.font.size = Pt(14)
            paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER

    summary_table_row6_col_3 = summary_table.cell(5,2)
    summary_table_row6_col_3.text = str(dialogue_line_indentation)
    summary_table_row6_col_3.vertical_alignment = WD_CELL_VERTICAL_ALIGNMENT.CENTER 
    for paragraph in summary_table_row6_col_3.paragraphs:
        for run in paragraph.runs:
            run.font.size = Pt(14)
            paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER

    summary_table_row6_col_4 = summary_table.cell(5,3)
    summary_table_row6_col_4.text = str(dialogue_formated)
    summary_table_row6_col_4.vertical_alignment = WD_CELL_VERTICAL_ALIGNMENT.CENTER 
    for paragraph in summary_table_row6_col_4.paragraphs:
        for run in paragraph.runs:
            run.font.size = Pt(14)
            paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER

    summary_table_row6_col_5 = summary_table.cell(5,4)
    total_dialogue = dialogue_formated + dialogue_line_indentation+Dialogue_case_corrected_count
    summary_table_row6_col_5.text = str(total_dialogue)
    summary_table_row6_col_5.vertical_alignment = WD_CELL_VERTICAL_ALIGNMENT.CENTER 
    for paragraph in summary_table_row6_col_5.paragraphs:
        for run in paragraph.runs:
            run.font.size = Pt(14)
            paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER


               #ROW --- 7
    summary_table_row7_col_1 = summary_table.cell(6,0)
    summary_table_row7_col_1.text = "Transitions"
    summary_table_row7_col_1.vertical_alignment = WD_CELL_VERTICAL_ALIGNMENT.CENTER 
    for paragraph in summary_table_row7_col_1.paragraphs:
      for run in paragraph.runs:
          run.font.size = Pt(14)

    summary_table_row7_col_2 = summary_table.cell(6,1)
    summary_table_row7_col_2.text = str(transitions_case_corrected_count)
    summary_table_row7_col_2.vertical_alignment = WD_CELL_VERTICAL_ALIGNMENT.CENTER 
    for paragraph in summary_table_row7_col_2.paragraphs:
        for run in paragraph.runs:
            run.font.size = Pt(14)
            paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER

    summary_table_row7_col_3 = summary_table.cell(6,2)
    summary_table_row7_col_3.text = str(transitions_line_indentation)
    summary_table_row7_col_3.vertical_alignment = WD_CELL_VERTICAL_ALIGNMENT.CENTER 
    for paragraph in summary_table_row7_col_3.paragraphs:
        for run in paragraph.runs:
            run.font.size = Pt(14)
            paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER

    summary_table_row7_col_4 = summary_table.cell(6,3)
    summary_table_row7_col_4.text = str(transitions_formated)
    summary_table_row7_col_4.vertical_alignment = WD_CELL_VERTICAL_ALIGNMENT.CENTER 
    for paragraph in summary_table_row7_col_4.paragraphs:
        for run in paragraph.runs:
            run.font.size = Pt(14)
            paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER
    
    summary_table_row7_col_5 = summary_table.cell(6,4)
    total_transition = transitions_formated+transitions_line_indentation+transitions_case_corrected_count
    summary_table_row7_col_5.text = str(total_transition)
    summary_table_row7_col_5.vertical_alignment = WD_CELL_VERTICAL_ALIGNMENT.CENTER 
    for paragraph in summary_table_row7_col_5.paragraphs:
        for run in paragraph.runs:
            run.font.size = Pt(14)
            paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER


           #ROW --- 8
    summary_table_row8_col_1 = summary_table.cell(7,0)
    summary_table_row8_col_1.text = "Special Terms"
    summary_table_row8_col_1.vertical_alignment = WD_CELL_VERTICAL_ALIGNMENT.CENTER 
    for paragraph in summary_table_row8_col_1.paragraphs:
      for run in paragraph.runs:
          run.font.size = Pt(14)

    summary_table_row8_col_2 = summary_table.cell(7,1)
    summary_table_row8_col_2.text = str(st_case_corrected_count)
    summary_table_row8_col_2.vertical_alignment = WD_CELL_VERTICAL_ALIGNMENT.CENTER 
    for paragraph in summary_table_row8_col_2.paragraphs:
        for run in paragraph.runs:
            run.font.size = Pt(14)
            paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER
    
    summary_table_row8_col_3 = summary_table.cell(7,2)
    summary_table_row8_col_3.text = str(st_line_indentation)
    summary_table_row8_col_3.vertical_alignment = WD_CELL_VERTICAL_ALIGNMENT.CENTER 
    for paragraph in summary_table_row8_col_3.paragraphs:
        for run in paragraph.runs:
            run.font.size = Pt(14)
            paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER

    summary_table_row8_col_4 = summary_table.cell(7,3)
    summary_table_row8_col_4.text = str(st_formated)
    summary_table_row8_col_4.vertical_alignment = WD_CELL_VERTICAL_ALIGNMENT.CENTER 
    for paragraph in summary_table_row8_col_4.paragraphs:
        for run in paragraph.runs:
            run.font.size = Pt(14)
            paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER

    summary_table_row8_col_5 = summary_table.cell(7,4)
    total_special_term = st_formated +st_line_indentation+ st_case_corrected_count
    summary_table_row8_col_5.text = str(total_special_term)
    summary_table_row8_col_5.vertical_alignment = WD_CELL_VERTICAL_ALIGNMENT.CENTER 
    for paragraph in summary_table_row8_col_5.paragraphs:
        for run in paragraph.runs:
            run.font.size = Pt(14)
            paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER


    #ROW --- 9
    summary_table_row9_col_1 = summary_table.cell(8,0)
    summary_table_row9_col_1.text = "Total"
    summary_table_row9_col_1.vertical_alignment = WD_CELL_VERTICAL_ALIGNMENT.CENTER 
    for paragraph in summary_table_row9_col_1.paragraphs:
        for run in paragraph.runs:
            run.font.size = Pt(14)
            run.font.bold = True


    summary_table_row9_col_2 = summary_table.cell(8,1)
    summary_table_row9_col_2.text = str(sluglinecase_corrected_count+actionlinecase_corrected_count+speakercase_corrected_count+parentheticalcase_corrected_count +Dialogue_case_corrected_count+transitions_case_corrected_count+st_case_corrected_count)
    summary_table_row9_col_2.vertical_alignment = WD_CELL_VERTICAL_ALIGNMENT.CENTER 
    for paragraph in summary_table_row9_col_2.paragraphs:
        for run in paragraph.runs:
            run.font.size = Pt(14)
            paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER

    summary_table_row9_col_3 = summary_table.cell(8,2)
    summary_table_row9_col_3.text = str(slugline_indentation+actionline_indentation+speaker_indentation+parenthetical_line_indentation+dialogue_line_indentation+transitions_line_indentation+st_line_indentation)
    summary_table_row9_col_3.vertical_alignment = WD_CELL_VERTICAL_ALIGNMENT.CENTER 
    for paragraph in summary_table_row9_col_3.paragraphs:
        for run in paragraph.runs:
            run.font.size = Pt(14)
            paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER

    summary_table_row9_col_4 = summary_table.cell(8,3)
    summary_table_row9_col_4.text = str(slugline_formated+actionline_formated+speaker_formated+parenthetical_formated+dialogue_formated+transitions_formated+st_formated)
    summary_table_row9_col_4.vertical_alignment = WD_CELL_VERTICAL_ALIGNMENT.CENTER 
    for paragraph in summary_table_row9_col_4.paragraphs:
        for run in paragraph.runs:
            run.font.size = Pt(14)
            paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER


    summary_table_row9_col_5 = summary_table.cell(8,4)
    summary_table_row9_col_5.text = str(total_slug+total_action_line+total_speaker+total_parenthetical+total_dialogue+total_transition+total_special_term)
    summary_table_row9_col_5.vertical_alignment = WD_CELL_VERTICAL_ALIGNMENT.CENTER 
    for paragraph in summary_table_row9_col_5.paragraphs:
        for run in paragraph.runs:
            run.font.size = Pt(14)
            paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER


#--------------------------- 14-09-2023
    for _ in range(3):
        output_doc.add_paragraph()
   #----------------------- 14-09-23
    para = output_doc.add_paragraph()
    para.alignment = WD_ALIGN_PARAGRAPH.CENTER

# Audit detail at center of the page with bold
    run = para.add_run()
    font = run.font
    font.bold = True
    font.size = Pt(18)
    run.text = ' Audit Details'
    run.add_break()
  # --------------------------  14-09-23

    no_rows = len(audit_df.index)

    table =output_doc.add_table(1, cols =6)
    table.alignment = WD_TABLE_ALIGNMENT.CENTER

    table.style = 'Colorful Shading Accent 6'

    table.autofit = False
    table.columns[0].width = Inches(0.5)
    table.columns[1].width = Inches(1.2)
    table.columns[2].width = Inches(2)
    table.columns[3].width = Inches(1.5)
    table.columns[4].width = Inches(2)
    table.columns[5].width = Inches(2.5)
#     table.columns[3].width = Inches(0.5)


    heading_cells = table.rows[0].cells

    heading_cells[0].width = Inches(0.1)
    heading_cells[1].width = Inches(0.1)
    heading_cells[2].width = Inches(3.5)
    heading_cells[3].width = Inches(0.8)
    heading_cells[4].width = Inches(3.5)
    heading_cells[5].width = Inches(2)

    heading_cells[0].text = 'Line No'
    heading_cells[1].text = 'Audited Line No'
    heading_cells[2].text = 'Current Content'
    heading_cells[3].text = 'Script Element'
    heading_cells[4].text = 'New Content'
    heading_cells[5].text = 'Changes Done'


    for i in range(0,6):
        heading_cells[i].paragraphs[0].runs[0].font.bold = True
        heading_cells[i].paragraphs[0].runs[0].font.size = Pt(9)


#------------------------------->LOGIC HERE<---------------------------------------------
    report_df = pd.DataFrame(columns=['line_no', 'audited_line_no', 'current_content', 'script_element', 'new_content', 'changes_done', 'para_no'])

    for index in audit_df.index:

        columns_to_check = ["line_removed","introduction",	"appendix",	"page_no"	,"left_indent_corrected"	,"right_indent_corrected"	,"line_wrapped_at_prescribed_right_indent",	"case_corrected",	"blank_inserted_before"	,"blank_inserted_after"	,"blank_deleted_before"	,"blank_deleted_after"	,"space_removed_between_characters"	,"space_added_between_characters"	,"line_merged_with_next_line",	"line_broken_into_multiple_lines"	,"punctuation_mark_added"	,"punctuation_mark_removed"	,"language_specific_audit_comments"]
        audit_df[columns_to_check] = audit_df[columns_to_check].fillna('No')
        if audit_df.loc[index, columns_to_check].eq('No').all().all():
            # All columns contain 'No', skip this row
            continue
        elif audit_df['introduction'][index] == 'Yes':
            continue
        elif audit_df['appendix'][index] == 'Yes':
            continue
        elif audit_df['Identification_Status'][index] == 'blank':
            continue
        elif pd.isna(audit_df.loc[index, "Identification_Status"]):
            continue


        para_value = audit_df["para_no"][index]    # ---------------------------------------------><-------------------------
        current_para_value = report_df['para_no'].iloc[-1] if not report_df.empty else None
        if para_value == current_para_value:
            continue
        else:
            # report_df = report_df.append(audit_df.loc[index], ignore_index=True)
            new_row = audit_df.loc[index].to_frame().T
            report_df = pd.concat([report_df, new_row], ignore_index=True)
        print("current_para_value",current_para_value)


        row_index = 1
        old_line_no_index = index
        collection_old_line_no = []
        while old_line_no_index < len(audit_df) and str(audit_df["para_no"][old_line_no_index]) == str(para_value):
            if audit_df['Identification_Status'][old_line_no_index] != "blank":
                try:
                    data = int(old_line_no_index)
                    collection_old_line_no.append(str(data))
                except ValueError:
                    pass
            old_line_no_index += 1

        cells = table.add_row().cells
        cells[0].width = Inches(0.1)
        cells[0].text = ', '.join(collection_old_line_no)

        audited_line_index = index
        #--------------------------------------audited_lino_no------------------
        collection_audited_line_no = []
        while audited_line_index < len(audit_df) and str(audit_df["para_no"][audited_line_index]) == str(para_value):
            if audit_df['Identification_Status'][audited_line_index] != "blank":
                audited_line_no = audit_df['audited_line_no'][audited_line_index]
                try:
                    data = int(audited_line_no)
                    collection_audited_line_no.append(str(data))
                except ValueError:
                    pass
            audited_line_index += 1
        print("collection_audited_line_no", collection_audited_line_no)
        data_string = ', '.join(collection_audited_line_no)
        print("data_string:", data_string)
        cells[1].width = Inches(0.1)
        cells[1].text = data_string


        #------------------------------>OLD DATA<---------------------------------
        data_index = index
        collection_data = []
        while data_index < len(audit_df) and str(audit_df["para_no"][data_index]) == str(para_value):
            cur_data = audit_df['data'][data_index]
            if not pd.isna(cur_data):  # Check if the value is not NaN
                data = str(cur_data).strip()
                collection_data.append(data)
            data_index += 1

        cells[2].width = Inches(3.5)
        data = str(data)
        cells[2].text = '\n '.join(collection_data)

        if audit_df['Identification_Status'][index] == 'blank':
            script_element = 'Blank Line'
        elif audit_df['Identification_Status'][index] == '':
            if audit_df['introduction'][index] == 'Yes':
                script_element = 'Title/Introduction'
            elif audit_df['appendix'][index] == 'Yes':
                script_element = 'Appendix'
            else:
                continue

        else:
            script_element = ps_to_script_element(audit_df['Identification_Status'][index])
        data =  script_element
        cells[3].width = Inches(0.8)
        cells[3].text = data

        collection_new_data = []
        new_data_index = index

        while new_data_index < len(audit_df) and str(audit_df["para_no"][new_data_index]) == str(para_value):
            if audit_df["line_removed"][new_data_index] == "No":
                new_data =  audit_df['data_corrected'][new_data_index]
                if not pd.isna(new_data):  # Check if the value is not NaN
                    data = str(new_data).strip()
                    collection_new_data.append(data)
            new_data_index += 1
        data =  str(new_data).strip()
        cells[4].width = Inches(3.5)
        data = str(data)
        cells[4].text = '\n '.join(collection_new_data)

        sno = 1
        changes_done = False

        # identification_status = audit_df['Identification_Status'][index]
        if pd.isnull(audit_df['Identification_Status'][index]) or audit_df['Identification_Status'][index] == "":
            continue


        if audit_df['left_indent_corrected'][index] != 'No':
            change_comment = audit_df['left_indent_corrected'][index]
            try:
              str_int = change_comment[-2]+change_comment[-1]
            except Exception as e:
              pass
            if  ps_to_script_element(audit_df['Identification_Status'][index]) == "Dialogue":
                if str_int == "15":
                  change_comment = "Dialogue line left index corrected to 1.5 Inch"
                elif str_int == "25":
                  change_comment = "Dialogue line left index corrected to 2.5 Inch"


            if str_int == "15":
              name =   ps_to_script_element(audit_df['Identification_Status'][index])
              change_comment = f"{name} Line left indent corrected to 1.5 Inch"
              print(change_comment)
            elif str_int == "25":
              name =   ps_to_script_element(audit_df['Identification_Status'][index])
              change_commen = f"{name} Left indent corrected to 2.5 Inch"

            elif str_int == "30":
                change_comment = "Parenthetical left indent corrected to 3 Inch"
            elif str_int == "35":
                change_comment = "Speaker left indent corrected to 3.5 Inch"

            data = str(sno) + '. ' + str(change_comment)
            # dataa = data.split()
            # if dataa[-1] == "nan":
            #   continue

            cells[5].width = Inches(2)
            para = cells[5].add_paragraph()
            run = para.add_run()
            run.text = data
            run.add_break()
            sno += 1
            changes_done = True

        if audit_df['right_indent_corrected'][index] != 'No':
            name = ps_to_script_element(audit_df['Identification_Status'][index])
            change_comment = audit_df['right_indent_corrected'][index]
            try:
              str_int = change_comment[-2]+change_comment[-1]
            except Exception as e:
              pass
            if str_int == "10":
              change_comment = f"{name} Line right indent corrected to 1 Inch"


            data = str(sno) + '. ' + str(change_comment)
            # dataa = data.split()
            # if dataa[-1] == "nan":
            #   continue
            cells[5].width = Inches(2)
            para = cells[5].add_paragraph()
            run = para.add_run()
            run.text = data
            run.add_break()
            sno += 1
            changes_done = True

        if audit_df['case_corrected'][index] != 'No':
            name = ps_to_script_element(audit_df['Identification_Status'][index])
            string = str(audit_df['case_corrected'][index])
            string = string.split()
            content = string[-1]
            if content == "AllUpper":
              change_comment = f'{name} Case ' + "Corrected to All Upper"
            elif content == "AllLower":
              change_comment = f'{name} Case ' + "Corrected to All Lowerr"
            if len(str(change_comment)) <= 2 :
                continue
            data = str(sno) + '. ' + str(change_comment)
            # dataa = data.split()
            # if dataa[-1] == "nan":
            #   continue
            cells[5].width = Inches(2)
            para = cells[5].add_paragraph()
            run = para.add_run()
            run.text = data
            run.add_break()
            sno += 1
            changes_done = True

        if audit_df['line_wrapped_at_prescribed_right_indent'][index] != 'No':
            change_comment = 'Line Wrapped at Prescribed Right Indent 1 Inch'
            name = ps_to_script_element(audit_df['Identification_Status'][index])
            if name == "Action":
              change_comment = f'{name} Line Wrapped at Prescribed Right Indent 1 Inch'
            elif name == "Dialogue":
              change_comment = f'{name} Line Wrapped at Prescribed Right Indent 2 Inch'

            data = str(sno) + '. ' + str(change_comment)
            # dataa = data.split()
            # if dataa[-1] == "nan":
            #   continue

            cells[5].width = Inches(2)
            para = cells[5].add_paragraph()
            run = para.add_run()
            run.text = data
            run.add_break()
            sno += 1
            changes_done = True

        if audit_df['line_broken_into_multiple_lines'][index] != 'No':
            name = ps_to_script_element(audit_df['Identification_Status'][index])
            change_comment = f'{name} line Broken into Multiple Lines'

            data = str(sno) + '. ' + str(change_comment)
            # dataa = data.split()
            # if dataa[-1] == "nan":
            #   continue
            cells[5].width = Inches(2)
            para = cells[5].add_paragraph()
            run = para.add_run()
            run.text = data
            run.add_break()
            sno += 1
            changes_done = True

        if audit_df['line_merged_with_next_line'][index] != 'No':
            name = ps_to_script_element(audit_df['Identification_Status'][index])
            change_comment = f'{name} line Merged with Next Line'

            data = str(sno) + '. ' + str(change_comment)
            # dataa = data.split()
            # if dataa[-1] == "nan":
            #   continue
            cells[5].width = Inches(2)
            para = cells[5].add_paragraph()
            run = para.add_run()
            run.text = data
            run.add_break()
            sno += 1
            changes_done = True

        if audit_df['language_specific_audit_comments'][index] != 'No':
            pass
            name = ps_to_script_element(audit_df['Identification_Status'][index])
            change_comment = f"{name}",str(audit_df['language_specific_audit_comments'][index])

            data = str(sno) + '. ' + str(change_comment)

            cells[5].width = Inches(2)
            para = cells[5].add_paragraph()
            run = para.add_run()
            run.text = data
            run.add_break()
            sno += 1
            changes_done = True


        if audit_df['blank_inserted_after'][index] != 'No':
            change_comment = 'A blank line is added below'
            # name = ps_to_script_element(audit_df['Identification_Status'][index])
            # if name == "Action":
            #   change_comment = f'{name}Line Wrapped at Prescribed Right Indent 1 Inch'
            # elif name == "Dialogue":
            #   change_comment = f'{name}Line Wrapped at Prescribed Right Indent 2 Inch'

            data = str(sno) + '. ' + str(change_comment)
            # dataa = data.split()
            # if dataa[-1] == "nan":
            #   continue

            cells[5].width = Inches(2)
            para = cells[5].add_paragraph()
            run = para.add_run()
            run.text = data
            run.add_break()
            sno += 1
            changes_done = True

        if not changes_done:
            continue
            # data = 'No Changes Done'
            # cells[5].width = Inches(2)
            # para = cells[5].add_paragraph()
            # run = para.add_run()
            # run.text = data
            # run.add_break()

        row_index += 1

    buffer = io.BytesIO()
    output_doc.save(buffer)
    buffer.seek(0)


    #output_doc.save(audit_report_tabular_docx)
    return buffer


def replace_dot_with_comma(slugline):
    pattern = r'((?:INT\./EXT\. |INT\. |EXT\. |E/I\. |INT |EXT)?)\s*(.*?)\s*-\s*([A-Z\s]+)'
    def replacer(match):
        location = match.group(2)
        location = location.replace(".", ",")
        return f'{match.group(1)}{location} - {match.group(3)}'
    return re.sub(pattern, replacer, slugline)


def change_dot_to_comma_inslug(df):
    for index, row in df.iterrows():
        if row['script_element'] == 'slugline':
            text = (row['content'])
            print(text)
            modified_sluglines = replace_dot_with_comma(text)
            print(modified_sluglines)
            df.loc[index, 'content'] = modified_sluglines
            
    return df


def fdx_to_audited_df(input_script):
    
    fdx = open(input_script, 'r')
    fdx_df = utilities.fdx_to_csv(fdx)

    df = pd.DataFrame(columns=['para_no','scene_no','content','script_element'])
    
    df['content'] = fdx_df['Text']
    df['script_element'] = fdx_df['Script_Element']

    di = {'Scene Heading':'slugline','Character':'speaker','Parenthetical':'parenthetical','Transition':'transition','Action':'action','Dialogue':'dialogue'}

    df.replace({"script_element":di},inplace= True)

    ##inserting blanks
    ## after slugline
    ## after action
    ## after dialogue
    ## after transition
    count = len(df)    
    for index in df.index:
        se = df['script_element'][index]
        if index < (count-1):

            if se in ('slugline','action','dialogue','transition'):
                # skip parenthticals in between dialogues
                if not(se == 'dialogue' and df['script_element'][index+1] in ('parenthetical','dialogue')):
                    df.loc[index + 0.5] = np.nan
                    df.loc[index + 0.5,'script_element'] = 'blank'
                    df.loc[index + 0.5,'content'] = ''

        ## case upper for slugline, character, transition, lower for parenthetical
        ## more elaborate function for slugline is present in sa_functions_english which will have to be repurposed
        if se in ('slugline','speaker','transition'):
            df.loc[index,'content'] = str(df.loc[index,'content']).upper()
        if se == 'parenthetical':
            df.loc[index,'content'] = str(df.loc[index,'content']).lower()


    df = df.sort_index().reset_index(drop=True)

    ## add para_no and scene_no

    para_no = 1
    scene_no = 1

    for index in df.index:
        
        df['para_no'][index] = para_no
        df['scene_no'][index] = scene_no

        if df['script_element'][index] == 'slugline':
            scene_no += 1

        para_no += 1

    #best of luck
    return df