import sys import time import json from os import path basepath = "/home/user/mnf/project/MNF/conversion/subtitling" # filenames = sys.argv[1] # movie_path = sys.argv[2] # current = basepath + "/"+movie_path def aws_final(current): print("$"*50) def formatTime(t): seconds, remainder_of_seconds = t.split('.') result = time.strftime('%H:%M:%S', time.gmtime(int(seconds))) return result + "," + remainder_of_seconds.ljust(3, '0') chunks = [] chunk = { 'start_time': '', 'end_time': '', 'word_index': 1, 'sentence': '' } word_break_limit = 11 # if len(sys.argv) < 2: # sys.exit('Please provide a file name.') filename = current + '/output.json' if not path.exists(filename): sys.exit('File ' + filename + ' does not exist.') with open(filename, encoding='utf8') as f: data = json.load(f) items = data['results']['items'] last_word_end_time = None for i, item in enumerate(items): type = item['type'] content = item['alternatives'][0]['content'] if type == "pronunciation": item_start_time = item['start_time'] if last_word_end_time and item_start_time < last_word_end_time: item_start_time = last_word_end_time if chunk['start_time'] == "": chunk['start_time'] = item_start_time chunk['end_time'] = item['end_time'] # Don't want to start a fresh sentence with a space spacer = '' if chunk['word_index'] == 1 else ' ' chunk['sentence'] = chunk['sentence'] + spacer + content chunk['word_index'] = chunk['word_index'] + 1 last_word_end_time = item['end_time'] elif type == "punctuation": # Add punctuation # But don't increment index chunk['sentence'] = chunk['sentence'] + content # - we hit the word break limit (provided the NEXT item is not punctuation) item_is_ending_punctuation = content == '.' or content == '?' or content == '!' next_item_is_punctuation = i < len( items) - 1 and items[i+1]['type'] == "punctuation" hit_word_break_limit = chunk['word_index'] >= word_break_limit and not next_item_is_punctuation is_last_item = len(items) - 1 == i if item_is_ending_punctuation or hit_word_break_limit or is_last_item: chunks.append(chunk) chunk = { 'start_time': '', 'end_time': '', 'word_index': 1, 'sentence': '' } srt = '' # Build out srt for i, chunk in enumerate(chunks): chunk_index = str(i + 1) srt = srt + chunk_index + "\n" srt = srt + formatTime(chunk['start_time']) + " --> " + \ formatTime(chunk['end_time']) + "\n" + chunk['sentence'] + "\n\n" # sys.stdout = open(rf"{current}/aws_subtitle.srt", "w", encoding='utf8') # aws_subtitle_file = open( # rf"{current}/aws_subtitle.srt", "w", encoding='utf8').write open(rf"{current}/a_subtitles.srt", "w", encoding='utf8').write(srt) # aws_subtitle_file.write(srt) print(srt)