77 lines
3.4 KiB
Python
77 lines
3.4 KiB
Python
import csv
|
||
import pandas as pd
|
||
import os
|
||
from openai import OpenAI
|
||
import sys
|
||
|
||
|
||
|
||
|
||
def classify_lines(input_file_path, audit_ai_csv) -> list[str]:
|
||
from scriptAudit.utils import remove_empty_content,remove_leading_numbers, remove_numeric_only_content,remove_emptyline_rows,merge_consecutive_action_lines,merge_consecutive_dialogue_lines
|
||
from scriptAudit.utils import insert_blank_lines,add_fade_in_out,remove_asterisks, merge_consecutive_action_lines_new,extract_labeled_lines, remove_trailing_speaker
|
||
|
||
with open(input_file_path, "r") as f:
|
||
raw_lines = [line.strip() for line in f.readlines() if line.strip()]
|
||
|
||
chunked_results = []
|
||
client = OpenAI(api_key=os.getenv('openai_key'))
|
||
prompt = (
|
||
"You are a screenplay assistant. For each line below, classify it using one of these labels: "
|
||
"slugline, speaker, dialogue, action, parenthetical, transition. Return each line followed by its label in curly braces.\n\n"
|
||
)
|
||
for i in range(0, len(raw_lines), 20):
|
||
chunk = raw_lines[i:i + 20]
|
||
final_prompt = prompt + "\n".join(chunk)
|
||
response = client.responses.create(
|
||
model="gpt-4o",
|
||
input=[
|
||
{
|
||
"role": "developer",
|
||
"content": """You are a screenplay auditor. For each line below, classify it using one of these labels:
|
||
slugline, speaker, dialogue, action, parenthetical, transition, special_term, title. Return each line followed by its label in curly braces.\n\n
|
||
**Examples:**
|
||
INT. ROOM – NIGHT {slugline}
|
||
KITCHEN – DAY {slugline}
|
||
JOHN {speaker}
|
||
(quietly) {parenthetical}
|
||
JOHN (O.S.) {speaker}
|
||
JOHN (angrily) {speaker}
|
||
I knew you’d come. {dialogue}
|
||
She turns away from the window. {action}
|
||
FADE OUT. {transition}
|
||
THE END {title}
|
||
(V.O.) {special_term}
|
||
John CONT'D {speaker}"""
|
||
},
|
||
{
|
||
"role": "user",
|
||
"content": "I need you to classify the lines below. Please provide the classification in the format: 'line {label}'\n\n" + "\n".join(chunk)
|
||
}
|
||
]
|
||
)
|
||
|
||
classified = response.output_text.splitlines()
|
||
chunked_results.extend(classified)
|
||
extracted = extract_labeled_lines(chunked_results)
|
||
with open(audit_ai_csv, mode='w', newline='', encoding='utf-8') as f:
|
||
writer = csv.writer(f)
|
||
writer.writerow(["content", "script_element"])
|
||
writer.writerows(extracted)
|
||
print("Classification completed.")
|
||
|
||
df = pd.read_csv(audit_ai_csv)
|
||
df = remove_empty_content(df)
|
||
df = remove_asterisks(df)
|
||
df = remove_leading_numbers(df)
|
||
df = remove_numeric_only_content(df)
|
||
df = remove_emptyline_rows(df)
|
||
df = remove_trailing_speaker(df)
|
||
# df = merge_consecutive_action_lines(df)
|
||
df = merge_consecutive_action_lines_new(df)
|
||
df = merge_consecutive_dialogue_lines(df)
|
||
df = insert_blank_lines(df)
|
||
|
||
df = add_fade_in_out(df)
|
||
|
||
return df |