Conversion_Kitchen_Code/kitchen_counter/conversion/subtitling/awSRT.py

104 lines
3.3 KiB
Python
Raw Normal View History

2024-04-27 09:33:09 +00:00
import sys
import time
import json
from os import path
basepath = "/home/user/mnf/project/MNF/conversion/subtitling"
# filenames = sys.argv[1]
# movie_path = sys.argv[2]
# current = basepath + "/"+movie_path
def aws_final(current):
print("$"*50)
def formatTime(t):
seconds, remainder_of_seconds = t.split('.')
result = time.strftime('%H:%M:%S', time.gmtime(int(seconds)))
return result + "," + remainder_of_seconds.ljust(3, '0')
chunks = []
chunk = {
'start_time': '',
'end_time': '',
'word_index': 1,
'sentence': ''
}
word_break_limit = 11
# if len(sys.argv) < 2:
# sys.exit('Please provide a file name.')
filename = current + '/output.json'
if not path.exists(filename):
sys.exit('File ' + filename + ' does not exist.')
with open(filename, encoding='utf8') as f:
data = json.load(f)
items = data['results']['items']
last_word_end_time = None
for i, item in enumerate(items):
type = item['type']
content = item['alternatives'][0]['content']
if type == "pronunciation":
item_start_time = item['start_time']
if last_word_end_time and item_start_time < last_word_end_time:
item_start_time = last_word_end_time
if chunk['start_time'] == "":
chunk['start_time'] = item_start_time
chunk['end_time'] = item['end_time']
# Don't want to start a fresh sentence with a space
spacer = '' if chunk['word_index'] == 1 else ' '
chunk['sentence'] = chunk['sentence'] + spacer + content
chunk['word_index'] = chunk['word_index'] + 1
last_word_end_time = item['end_time']
elif type == "punctuation":
# Add punctuation
# But don't increment index
chunk['sentence'] = chunk['sentence'] + content
# - we hit the word break limit (provided the NEXT item is not punctuation)
item_is_ending_punctuation = content == '.' or content == '?' or content == '!'
next_item_is_punctuation = i < len(
items) - 1 and items[i+1]['type'] == "punctuation"
hit_word_break_limit = chunk['word_index'] >= word_break_limit and not next_item_is_punctuation
is_last_item = len(items) - 1 == i
if item_is_ending_punctuation or hit_word_break_limit or is_last_item:
chunks.append(chunk)
chunk = {
'start_time': '',
'end_time': '',
'word_index': 1,
'sentence': ''
}
srt = ''
# Build out srt
for i, chunk in enumerate(chunks):
chunk_index = str(i + 1)
srt = srt + chunk_index + "\n"
srt = srt + formatTime(chunk['start_time']) + " --> " + \
formatTime(chunk['end_time']) + "\n" + chunk['sentence'] + "\n\n"
# sys.stdout = open(rf"{current}/aws_subtitle.srt", "w", encoding='utf8')
# aws_subtitle_file = open(
# rf"{current}/aws_subtitle.srt", "w", encoding='utf8').write
open(rf"{current}/a_subtitles.srt", "w", encoding='utf8').write(srt)
# aws_subtitle_file.write(srt)
print(srt)