657 lines
19 KiB
Python
657 lines
19 KiB
Python
|
from io import IOBase, StringIO
|
||
|
import os,csv
|
||
|
import subprocess
|
||
|
from centralisedFileSystem.models import File, ScreenPlay, Script , BeatSheet
|
||
|
|
||
|
from bs4 import BeautifulSoup as bfs
|
||
|
from docx import Document
|
||
|
from docx.enum.text import WD_ALIGN_PARAGRAPH
|
||
|
from docx.shared import Inches, Mm, Pt
|
||
|
import pandas as pd
|
||
|
# import pdftotext
|
||
|
from pdf2docx import parse
|
||
|
from tika import parser
|
||
|
|
||
|
import page_script.models as ps_models
|
||
|
from django.conf import settings
|
||
|
from django.template.loader import render_to_string
|
||
|
from django.utils.html import strip_tags
|
||
|
from django.core.mail import EmailMultiAlternatives
|
||
|
from .filesystem import get_file_path
|
||
|
|
||
|
def fdx_to_csv(fdx_file : IOBase) -> pd.DataFrame:
|
||
|
"""
|
||
|
Converts th efdx document to Pandas DataFrame.
|
||
|
Give FileIO as input, Not file path.
|
||
|
|
||
|
Args:
|
||
|
fdx_file (IOBase): fdx File object
|
||
|
|
||
|
Returns:
|
||
|
pd.DataFrame: csv generated from given fdx
|
||
|
"""
|
||
|
|
||
|
fdx_content = fdx_file.read()
|
||
|
soup = bfs(fdx_content, 'xml')
|
||
|
|
||
|
paragraphs = soup.find_all('Paragraph')
|
||
|
|
||
|
df = pd.DataFrame(columns=['Text','Script_Element'])
|
||
|
|
||
|
for para in paragraphs:
|
||
|
try:
|
||
|
script_element = para['Type']
|
||
|
except:
|
||
|
continue
|
||
|
|
||
|
dirty_texts = para.find_all('Text')
|
||
|
cleaned_texts = [text.get_text().strip() for text in dirty_texts]
|
||
|
|
||
|
full_txt = " ".join(cleaned_texts)
|
||
|
full_txt = full_txt.strip()
|
||
|
|
||
|
_d = pd.DataFrame({'Text':[full_txt], 'Script_Element':[script_element]})
|
||
|
df = pd.concat([df, _d], ignore_index=True)
|
||
|
|
||
|
return df
|
||
|
|
||
|
|
||
|
def fdx_to_txt(fdx_file : IOBase) -> str:
|
||
|
"""
|
||
|
Converts the fdx document to PlainText (string with indentations).
|
||
|
Give FileIO as input, Not file path.
|
||
|
|
||
|
Args:
|
||
|
fdx_file (IOBase): fdx File object
|
||
|
|
||
|
Returns:
|
||
|
str: string generated from given fdx
|
||
|
"""
|
||
|
|
||
|
df = fdx_to_csv(fdx_file)
|
||
|
|
||
|
count = len(df)
|
||
|
with StringIO() as f:
|
||
|
|
||
|
for _, txt, script_element in df.itertuples():
|
||
|
line = ''
|
||
|
|
||
|
if script_element == 'Character':
|
||
|
line = txt.rjust(len(txt)+35)
|
||
|
elif script_element == 'Dialogue':
|
||
|
line = txt.rjust(len(txt)+25)
|
||
|
elif script_element == 'Parenthetical':
|
||
|
line = txt.rjust(len(txt)+30)
|
||
|
elif script_element == 'Transition':
|
||
|
line = txt.rjust(len(txt)+55)
|
||
|
else:
|
||
|
line = txt.rjust(len(txt)+15)
|
||
|
|
||
|
#if script_element in ('Action', 'Scene Heading','Transition'):
|
||
|
# f.write('\n')
|
||
|
|
||
|
f.write(line)
|
||
|
|
||
|
if _ < (count-1):
|
||
|
if script_element in ('Dialogue', 'Action', 'Scene Heading','Transition'):
|
||
|
if not(script_element == 'Dialogue' and df['Script_Element'][_ +1] in ('Dialogue','Parenthetical')):
|
||
|
f.write('\n')
|
||
|
|
||
|
#f.write('\n')
|
||
|
|
||
|
return f.getvalue()
|
||
|
|
||
|
|
||
|
|
||
|
def csv_to_docx(csv: pd.DataFrame) -> Document:
|
||
|
|
||
|
output_doc = Document()
|
||
|
style = output_doc.styles["Normal"]
|
||
|
font = style.font
|
||
|
font.name = "Courier New"
|
||
|
font.size = Pt(12)
|
||
|
section = output_doc.sections[0]
|
||
|
section.page_height = Mm(297)
|
||
|
a4_right = 8.57
|
||
|
section.page_width = Inches(a4_right)
|
||
|
section.left_margin = Inches(1.5)
|
||
|
|
||
|
for index in csv.index:
|
||
|
para = output_doc.add_paragraph()
|
||
|
|
||
|
paragraph_format = para.paragraph_format
|
||
|
|
||
|
paragraph_format.space_before = Pt(0)
|
||
|
paragraph_format.space_after = Pt(0)
|
||
|
paragraph_format.line_spacing = Pt(12)
|
||
|
|
||
|
script_element = csv["script_element"][index]
|
||
|
content = csv["content"][index]
|
||
|
print("content = ",content)
|
||
|
if script_element == "blank":
|
||
|
continue
|
||
|
|
||
|
elif script_element == "slugline":
|
||
|
paragraph_format.left_indent = Inches(0)
|
||
|
paragraph_format.right_indent = Inches(0)
|
||
|
print("content is slugline")
|
||
|
try:
|
||
|
content = content.upper()
|
||
|
except Exception as exp:
|
||
|
print("Exception =", exp)
|
||
|
content = content
|
||
|
|
||
|
|
||
|
elif script_element == "action":
|
||
|
paragraph_format.left_indent = Inches(0)
|
||
|
paragraph_format.right_indent = Inches(0)
|
||
|
|
||
|
elif script_element == "dialogue":
|
||
|
paragraph_format.left_indent = Inches(1.0)
|
||
|
paragraph_format.right_indent = Inches(1.25)
|
||
|
|
||
|
elif script_element == "parenthetical":
|
||
|
paragraph_format.left_indent = Inches(1.5)
|
||
|
paragraph_format.right_indent = Inches(2.25)
|
||
|
|
||
|
elif script_element == "speaker":
|
||
|
paragraph_format.left_indent = Inches(2)
|
||
|
paragraph_format.right_indent = Inches(1)
|
||
|
print("content is speaker")
|
||
|
try:
|
||
|
content = content.upper()
|
||
|
except Exception as exp:
|
||
|
print("Exception =", exp)
|
||
|
content = content
|
||
|
|
||
|
elif script_element == "transition":
|
||
|
para.alignment = WD_ALIGN_PARAGRAPH.RIGHT
|
||
|
paragraph_format.left_indent = Inches(2.5)
|
||
|
paragraph_format.right_indent = Inches(0)
|
||
|
|
||
|
elif script_element == "special_term":
|
||
|
paragraph_format.left_indent = Inches(0)
|
||
|
paragraph_format.right_indent = Inches(0)
|
||
|
|
||
|
if isinstance(content, float):
|
||
|
content = ""
|
||
|
|
||
|
para.text = content
|
||
|
|
||
|
return output_doc
|
||
|
|
||
|
|
||
|
def get_csv_from_id(script_id : str, encoding : str ="utf-8") -> pd.DataFrame:
|
||
|
|
||
|
f_path, f_name = ps_models.MNFScriptDatabase_2.get_file_path(script_id)
|
||
|
|
||
|
path = f"{f_path}{f_name.rsplit('.',1)[0]+'_audited.csv'}"
|
||
|
|
||
|
if not os.path.exists(path):
|
||
|
|
||
|
path = path.replace("_audited.csv", ".csv")
|
||
|
|
||
|
if not os.path.exists(path):
|
||
|
raise FileNotFoundError(f"Not found : {path}")
|
||
|
|
||
|
try:
|
||
|
df = pd.read_csv(path, encoding=encoding)
|
||
|
except UnicodeError:
|
||
|
df = pd.read_csv(path, encoding="utf-16")
|
||
|
|
||
|
return df
|
||
|
|
||
|
def get_csv_path_from_id(script_id : str, encoding : str ="utf-8") -> pd.DataFrame:
|
||
|
|
||
|
f_path, f_name = ps_models.MNFScriptDatabase_2.get_file_path(script_id,"csv")
|
||
|
|
||
|
path = f"{f_path}{f_name.rsplit('.',1)[0]+'_audited.csv'}"
|
||
|
|
||
|
if not os.path.exists(path):
|
||
|
|
||
|
path = path.replace("_audited.csv", ".csv")
|
||
|
|
||
|
if not os.path.exists(path):
|
||
|
raise FileNotFoundError(f"Not found : {path}")
|
||
|
|
||
|
return path
|
||
|
|
||
|
|
||
|
def json_to_csv(json_obj: dict) -> pd.DataFrame:
|
||
|
|
||
|
df = pd.DataFrame(columns=["para_no", "scene_no", "content", "script_element"])
|
||
|
|
||
|
scene_nodes : list = json_obj["content"]
|
||
|
para = 1
|
||
|
|
||
|
for scene_no, scene in enumerate(scene_nodes, start=1):
|
||
|
|
||
|
if "content" not in scene.keys():
|
||
|
continue
|
||
|
|
||
|
for paragraph in scene["content"]:
|
||
|
|
||
|
se = paragraph["attrs"]["scriptElement"]
|
||
|
|
||
|
cn = ""
|
||
|
|
||
|
if se == "blank":
|
||
|
cn = None
|
||
|
|
||
|
elif "content" in paragraph.keys():
|
||
|
cn = paragraph["content"][0]["text"]
|
||
|
|
||
|
row = {"para_no": para, "scene_no": scene_no, "content": cn, "script_element": se}
|
||
|
|
||
|
para += 1
|
||
|
|
||
|
# df = df.append(row, ignore_index=True)
|
||
|
df = pd.DataFrame([row])
|
||
|
|
||
|
prv_el_bl = False
|
||
|
for index, row in df.iterrows():
|
||
|
el = row[-1]
|
||
|
if el == "blank" and not prv_el_bl:
|
||
|
prv_el_bl = True
|
||
|
elif el == "blank" and prv_el_bl:
|
||
|
df.drop(index=index, inplace=True)
|
||
|
else:
|
||
|
prv_el_bl = False
|
||
|
|
||
|
return df
|
||
|
|
||
|
|
||
|
def csv_to_json(csv: pd.DataFrame) -> str:
|
||
|
jsons: str = csv.to_json(orient="index")
|
||
|
return jsons
|
||
|
|
||
|
|
||
|
def docx_to_pdf(doc_path : str, path : str) -> str:
|
||
|
|
||
|
subprocess.call(
|
||
|
[
|
||
|
"soffice",
|
||
|
# '--headless',
|
||
|
"--convert-to",
|
||
|
"pdf",
|
||
|
"--outdir",
|
||
|
path,
|
||
|
doc_path,
|
||
|
]
|
||
|
)
|
||
|
|
||
|
pdf_path = path + '/' + doc_path.rsplit('/', 1)[1].rsplit('.', 1)[0] + ".pdf"
|
||
|
|
||
|
return pdf_path
|
||
|
|
||
|
|
||
|
def get_plain_text(script_id: str = None, path : str = None) -> str:
|
||
|
|
||
|
if not script_id:
|
||
|
f_name = path
|
||
|
|
||
|
if not path:
|
||
|
output_converted_txt: str = ""
|
||
|
f_path, f_name = ps_models.MNFScriptDatabase_2.get_file_path(script_id)
|
||
|
path: str = f"{f_path}{f_name}"
|
||
|
|
||
|
input_file = open(path, "rb")
|
||
|
|
||
|
if str(f_name).endswith("txt"):
|
||
|
output_converted_txt = input_file
|
||
|
|
||
|
elif str(f_name).endswith("pdf"):
|
||
|
|
||
|
# pdf = pdftotext.PDF(input_file)
|
||
|
# output_converted_txt = "\n\n".join(pdf) #commented on 08-2-24
|
||
|
pass
|
||
|
|
||
|
elif str(f_name).endswith("docx"):
|
||
|
|
||
|
parsed = parser.from_file(path)
|
||
|
output_converted_txt = parsed["content"]
|
||
|
|
||
|
else:
|
||
|
raise TypeError(
|
||
|
f"conversion of {f_name.rsplit('.', maxsplit=1)[-1]} files not supported"
|
||
|
)
|
||
|
|
||
|
return output_converted_txt
|
||
|
|
||
|
|
||
|
def pdf_to_docx(input_script : str, output_converted_docx : str) -> None:
|
||
|
|
||
|
parse(input_script,output_converted_docx,start=0,end=None)
|
||
|
|
||
|
def send_email_to_user(user,screenplay_name,subject,message):# removed flag = 1
|
||
|
subject = subject + "."
|
||
|
from_email = settings.EMAIL_HOST_USER
|
||
|
to = user.email
|
||
|
context = {
|
||
|
"Name": user,
|
||
|
"story_name": screenplay_name,
|
||
|
"message" : message,
|
||
|
}
|
||
|
html_content = render_to_string(
|
||
|
"audit/coree_email.html", context
|
||
|
)
|
||
|
text_content = strip_tags(html_content)
|
||
|
|
||
|
msg = EmailMultiAlternatives(subject, text_content, from_email, [to])
|
||
|
msg.attach_alternative(html_content, "text/html")
|
||
|
msg.send()
|
||
|
|
||
|
|
||
|
# def screen_play_details(script_id):
|
||
|
|
||
|
# """
|
||
|
# This Function will extract the details in the CSV
|
||
|
# No of Dialogues
|
||
|
# No of ActionLine
|
||
|
# No of Scene
|
||
|
# No of Speakers
|
||
|
# Speakers
|
||
|
|
||
|
# """
|
||
|
# print("Entering the ScreenPlay Details extraction")
|
||
|
# file_name = get_file_path(script_id, "script-csv")
|
||
|
|
||
|
# num_actions = 0
|
||
|
# num_dialogues = 0
|
||
|
# num_slugline = 0
|
||
|
# num_locations= set()
|
||
|
# num_speakers = set()
|
||
|
# num_tranisitions = 0
|
||
|
# num_parenthetical = 0
|
||
|
# num_special_terms = 0
|
||
|
# num_INT = 0
|
||
|
# num_EXT = 0
|
||
|
# longest_dialogue_len = 0
|
||
|
# longest_dialogue_speaker = "None"
|
||
|
# dialogue_count = {
|
||
|
|
||
|
# }
|
||
|
# with open(file_name, "r") as file:
|
||
|
# reader = csv.reader(file)
|
||
|
# header = next(reader)
|
||
|
|
||
|
# for row in reader:
|
||
|
# para_no, scene_no, content, script_element = row
|
||
|
|
||
|
# if script_element == "action":
|
||
|
# num_actions += 1
|
||
|
|
||
|
# if script_element == "dialogue":
|
||
|
# dialogue_count[content]+=1
|
||
|
# if longest_dialogue_len <len(content):
|
||
|
# longest_dialogue_speaker = content
|
||
|
# longest_dialogue_len = max(len(content),longest_dialogue_len)
|
||
|
# num_dialogues += 1
|
||
|
|
||
|
# if script_element == "slugline":
|
||
|
# if content[0:3]=="INT":
|
||
|
# num_INT+=1
|
||
|
# if content[0:3]=="EXT":
|
||
|
# num_EXT+=1
|
||
|
# num_slugline += 1
|
||
|
# num_locations.add(content)
|
||
|
|
||
|
# if script_element == "parenthetical":
|
||
|
# num_parenthetical += 1
|
||
|
|
||
|
# if script_element == "transition":
|
||
|
# num_tranisitions += 1
|
||
|
|
||
|
# if script_element == "special_term":
|
||
|
# num_special_terms += 1
|
||
|
|
||
|
# if script_element == "speaker":
|
||
|
# num_speakers.add(content)
|
||
|
|
||
|
# max_dialogue_speaker = max(dialogue_count, key=lambda word: dialogue_count[word])
|
||
|
# max_dialouge_count = dialogue_count[max_dialogue_speaker]
|
||
|
# num_speaker = len(num_speakers)
|
||
|
# details = {
|
||
|
# "action": num_actions,
|
||
|
# "dialogues":num_dialogues,
|
||
|
# "scenes": num_dialogues,
|
||
|
# "num_of_speaker": num_speaker,
|
||
|
# "special_term" : num_special_terms,
|
||
|
# "parenthetical" : num_parenthetical,
|
||
|
# "transition" : num_tranisitions,
|
||
|
# "speakers": list(num_speakers),
|
||
|
# "max_dialogue_speaker" : max_dialogue_speaker,
|
||
|
# "max_dialouge_count" : max_dialouge_count,
|
||
|
# "longest_dialogue_speaker" : longest_dialogue_speaker,
|
||
|
# "longest_dialogue_len" : longest_dialogue_len,
|
||
|
# "num_of_locations" : len(num_locations)
|
||
|
|
||
|
# }
|
||
|
# # print("Number of action lines:", num_actions)
|
||
|
# # print("Number of dialogues:", num_dialogues)
|
||
|
# # print("Number of scenes:", num_slugline)
|
||
|
# # print("Number of speaker:", len(num_speaker))
|
||
|
# return details
|
||
|
|
||
|
import csv
|
||
|
|
||
|
def estimate_page_count(word_count, words_per_page=100):
|
||
|
# You can adjust the words_per_page value based on your specific script formatting
|
||
|
return word_count / words_per_page
|
||
|
|
||
|
def screen_play_details(script_id):
|
||
|
print("Entering the ScreenPlay Details extraction")
|
||
|
file_name = get_file_path(script_id, "script-csv")
|
||
|
|
||
|
num_actions = 0
|
||
|
num_dialogues = 0
|
||
|
num_scenes = 0
|
||
|
num_locations = set()
|
||
|
num_speakers = set()
|
||
|
num_tranisitions = 0
|
||
|
num_parenthetical = 0
|
||
|
num_special_terms = 0
|
||
|
num_INT = 0
|
||
|
num_EXT = 0
|
||
|
longest_dialogue_len = 0
|
||
|
longest_dialogue_scene = 0
|
||
|
longest_dialogue_speaker = "None"
|
||
|
dialogue_count = {}
|
||
|
speakers = []
|
||
|
total_word_count = 0 # To calculate the total word count
|
||
|
gpt_count = 0
|
||
|
interval_at = 0
|
||
|
lock_status = False
|
||
|
no_of_pages = 0
|
||
|
|
||
|
with open(file_name, "r") as file:
|
||
|
reader = csv.reader(file)
|
||
|
header = next(reader)
|
||
|
|
||
|
for row in reader:
|
||
|
para_no, scene_no, content, script_element = row
|
||
|
|
||
|
if script_element == "action":
|
||
|
num_actions += 1
|
||
|
|
||
|
if script_element == "dialogue":
|
||
|
if content in dialogue_count:
|
||
|
dialogue_count[speakers[-1]] += 1
|
||
|
else:
|
||
|
dialogue_count[speakers[-1]] = 1
|
||
|
|
||
|
if len(content) > longest_dialogue_len:
|
||
|
longest_dialogue_scene = scene_no
|
||
|
longest_dialogue_speaker = speakers[-1]
|
||
|
longest_dialogue_len = len(content)
|
||
|
num_dialogues += 1
|
||
|
|
||
|
if script_element == "slugline":
|
||
|
if content.startswith("INT"):
|
||
|
num_INT += 1
|
||
|
elif content.startswith("EXT"):
|
||
|
num_EXT += 1
|
||
|
num_scenes += 1
|
||
|
num_locations.add(content)
|
||
|
|
||
|
if script_element == "parenthetical":
|
||
|
num_parenthetical += 1
|
||
|
|
||
|
if script_element == "transition":
|
||
|
num_tranisitions += 1
|
||
|
|
||
|
if script_element == "special_term":
|
||
|
num_special_terms += 1
|
||
|
|
||
|
if script_element == "speaker":
|
||
|
speakers.append(content)
|
||
|
num_speakers.add(content)
|
||
|
|
||
|
# Calculate the word count for this script element and add it to the total word count
|
||
|
words = content.split(" ")
|
||
|
total_word_count += len(words)
|
||
|
|
||
|
# Estimate the number of pages based on the total word count
|
||
|
# interval_at = Script.objects.get(id=script_id).interval_at
|
||
|
# lock_status = Script.objects.get(id=script_id).lock_status
|
||
|
# no_of_pages = Script.objects.get(id=script_id).no_of_pages
|
||
|
#gpt_count = BeatSheet.objects.filter(script__id=script_id, had_used_gpt=True).count()
|
||
|
max_dialogue_speaker = max(dialogue_count, key=lambda word: dialogue_count[word])
|
||
|
max_dialogue_count = dialogue_count[max_dialogue_speaker]
|
||
|
num_speaker = len(num_speakers)
|
||
|
|
||
|
details = {
|
||
|
"interval_at": interval_at,
|
||
|
"lock_status": lock_status,
|
||
|
"no_of_pages": no_of_pages,
|
||
|
"action": num_actions,
|
||
|
"dialogues": num_dialogues,
|
||
|
"scenes": num_scenes,
|
||
|
"num_of_speakers": num_speaker,
|
||
|
"special_term": num_special_terms,
|
||
|
"parenthetical": num_parenthetical,
|
||
|
"transition": num_tranisitions,
|
||
|
"speakers": list(num_speakers),
|
||
|
"max_dialogue_speaker": max_dialogue_speaker,
|
||
|
"max_dialogue_count": max_dialogue_count,
|
||
|
"longest_dialogue_speaker": longest_dialogue_speaker,
|
||
|
"longest_dialogue_len": longest_dialogue_len,
|
||
|
"longest_dialogue_scene" : longest_dialogue_scene,
|
||
|
"num_of_locations": len(num_locations),
|
||
|
"locations": list(num_locations),
|
||
|
"num_INT" : num_INT,
|
||
|
"num_EXT" : num_EXT,
|
||
|
"gpt_count": gpt_count,
|
||
|
"total_word_count": total_word_count,
|
||
|
# Include the estimated page count in the result
|
||
|
}
|
||
|
|
||
|
return details
|
||
|
def json_to_csv_scriptpad(json_obj: list) -> pd.DataFrame:
|
||
|
|
||
|
df = pd.DataFrame(columns=["para_no", "content", "script_element"])
|
||
|
|
||
|
para = 1
|
||
|
|
||
|
for paragraph in json_obj:
|
||
|
|
||
|
se = paragraph["attrs"]["scriptElement"]
|
||
|
|
||
|
cn = ""
|
||
|
|
||
|
if se == "blank":
|
||
|
cn = None
|
||
|
|
||
|
elif "content" in paragraph.keys():
|
||
|
cn = paragraph["content"][0]["text"]
|
||
|
|
||
|
row = {"para_no": para, "content": cn, "script_element": se}
|
||
|
|
||
|
para += 1
|
||
|
|
||
|
# df = df.append(row, ignore_index=True)
|
||
|
df = pd.DataFrame([row])
|
||
|
|
||
|
prv_el_bl = False
|
||
|
for index, row in df.iterrows():
|
||
|
el = row[-1]
|
||
|
if el == "blank" and not prv_el_bl:
|
||
|
prv_el_bl = True
|
||
|
elif el == "blank" and prv_el_bl:
|
||
|
df.drop(index=index, inplace=True)
|
||
|
else:
|
||
|
prv_el_bl = False
|
||
|
|
||
|
return df
|
||
|
|
||
|
# def Screen_Play_status_calculator(script_id):
|
||
|
|
||
|
# """
|
||
|
# This Function will extract the details in the CSV
|
||
|
# No of Pages
|
||
|
# No of Words
|
||
|
# No of Dialogues
|
||
|
# No of ActionLine
|
||
|
# No of Scene
|
||
|
# No of Speakers
|
||
|
# Speakers
|
||
|
# Chatgpt Use Count
|
||
|
# Longest Dialouge
|
||
|
# """
|
||
|
# print("Entering the ScreenPlay Details extraction")
|
||
|
# file_name = get_file_path(script_id, "script-csv")
|
||
|
# docx_file = get_file_path(script_id, "script-docx")
|
||
|
# doc = docx.Document(docx_file)
|
||
|
|
||
|
# # Initiali\ze counters for paragraphs and lines
|
||
|
# num_paragraphs = 0
|
||
|
# num_lines = 0
|
||
|
|
||
|
# # Iterate through paragraphs and count sd lines
|
||
|
# for paragraph in doc.paragraphs:
|
||
|
# num_paragraphs += 1
|
||
|
# num_lines += len(paragraph.text.split('\n'))
|
||
|
# words_per_page = 250
|
||
|
# pages = num_lines / words_per_page
|
||
|
# num_actions = 0
|
||
|
# num_dialogues = 0
|
||
|
# num_slugline = 0
|
||
|
# num_speakers = set()
|
||
|
# num_scene = 0
|
||
|
|
||
|
# with open(file_name, "r") as file:
|
||
|
# reader = csv.reader(file)
|
||
|
# header = next(reader)
|
||
|
|
||
|
# for row in reader:
|
||
|
# para_no, scene_no, content, script_element = row
|
||
|
|
||
|
# if script_element == "action":
|
||
|
# num_actions += 1
|
||
|
|
||
|
# if script_element == "dialogue":
|
||
|
# num_dialogues += 1
|
||
|
|
||
|
# if script_element == "slugline":
|
||
|
# num_slugline += 1
|
||
|
|
||
|
# if script_element == "speaker":
|
||
|
# num_speakers.add(content)
|
||
|
|
||
|
# # if scene_no != num_scene:
|
||
|
# # num_scene+=1
|
||
|
|
||
|
# num_speaker = len(num_speakers)
|
||
|
# details = {
|
||
|
# "action": num_actions,
|
||
|
# "dialogues":num_dialogues,
|
||
|
# "scenes": num_dialogues,
|
||
|
# "num_of_speaker": num_speaker,
|
||
|
# "speakers": list(num_speakers)
|
||
|
# }
|
||
|
# # print("Number of action lines:", num_actions)
|
||
|
# # print("Number of dialogues:", num_dialogues)
|
||
|
# # print("Number of scenes:", num_slugline)
|
||
|
# # print("Number of speaker:", len(num_speaker))
|
||
|
# return details
|