Conversion_Kitchen_Code/kitchen_counter/conversion/subtitling/awSRT.py

import sys
import time
import json
from os import path

basepath = "/home/user/mnf/project/MNF/conversion/subtitling"

# filenames = sys.argv[1]
# movie_path = sys.argv[2]
# current = basepath + "/"+movie_path


def aws_final(current):

    print("$"*50)

    def formatTime(t):
        seconds, remainder_of_seconds = t.split('.')
        result = time.strftime('%H:%M:%S', time.gmtime(int(seconds)))

        return result + "," + remainder_of_seconds.ljust(3, '0')

    chunks = []
    chunk = {
        'start_time': '',
        'end_time': '',
        'word_index': 1,
        'sentence': ''
    }
    word_break_limit = 11

    # if len(sys.argv) < 2:
    #     sys.exit('Please provide a file name.')

    filename = current + '/output.json'

    if not path.exists(filename):
        sys.exit('File ' + filename + ' does not exist.')

    with open(filename, encoding='utf8') as f:
        data = json.load(f)

    items = data['results']['items']

    last_word_end_time = None

    for i, item in enumerate(items):
        type = item['type']
        content = item['alternatives'][0]['content']

        if type == "pronunciation":
            item_start_time = item['start_time']

            if last_word_end_time and item_start_time < last_word_end_time:
                item_start_time = last_word_end_time

            if chunk['start_time'] == "":
                chunk['start_time'] = item_start_time

            chunk['end_time'] = item['end_time']
            # Don't want to start a fresh sentence with a space
            spacer = '' if chunk['word_index'] == 1 else ' '
            chunk['sentence'] = chunk['sentence'] + spacer + content
            chunk['word_index'] = chunk['word_index'] + 1
            last_word_end_time = item['end_time']

        elif type == "punctuation":
            # Add punctuation
            # But don't increment index
            chunk['sentence'] = chunk['sentence'] + content

        # - we hit the word break limit (provided the NEXT item is not punctuation)
        item_is_ending_punctuation = content == '.' or content == '?' or content == '!'
        next_item_is_punctuation = i < len(
            items) - 1 and items[i+1]['type'] == "punctuation"
        hit_word_break_limit = chunk['word_index'] >= word_break_limit and not next_item_is_punctuation
        is_last_item = len(items) - 1 == i

        if item_is_ending_punctuation or hit_word_break_limit or is_last_item:

            chunks.append(chunk)
            chunk = {
                'start_time': '',
                'end_time': '',
                'word_index': 1,
                'sentence': ''
            }

    srt = ''
    # Build out srt
    for i, chunk in enumerate(chunks):
        chunk_index = str(i + 1)
        srt = srt + chunk_index + "\n"
        srt = srt + formatTime(chunk['start_time']) + " --> " + \
            formatTime(chunk['end_time']) + "\n" + chunk['sentence'] + "\n\n"

    # sys.stdout = open(rf"{current}/aws_subtitle.srt", "w", encoding='utf8')
    # aws_subtitle_file = open(
    #     rf"{current}/aws_subtitle.srt", "w", encoding='utf8').write
    open(rf"{current}/a_subtitles.srt", "w", encoding='utf8').write(srt)
    # aws_subtitle_file.write(srt)

    print(srt)
first commit 2024-04-27 09:33:09 +00:00			`import sys`
			`import time`
			`import json`
			`from os import path`

			`basepath = "/home/user/mnf/project/MNF/conversion/subtitling"`

			`# filenames = sys.argv[1]`
			`# movie_path = sys.argv[2]`
			`# current = basepath + "/"+movie_path`


			`def aws_final(current):`

			`print("$"*50)`

			`def formatTime(t):`
			`seconds, remainder_of_seconds = t.split('.')`
			`result = time.strftime('%H:%M:%S', time.gmtime(int(seconds)))`

			`return result + "," + remainder_of_seconds.ljust(3, '0')`

			`chunks = []`
			`chunk = {`
			`'start_time': '',`
			`'end_time': '',`
			`'word_index': 1,`
			`'sentence': ''`
			`}`
			`word_break_limit = 11`

			`# if len(sys.argv) < 2:`
			`# sys.exit('Please provide a file name.')`

			`filename = current + '/output.json'`

			`if not path.exists(filename):`
			`sys.exit('File ' + filename + ' does not exist.')`

			`with open(filename, encoding='utf8') as f:`
			`data = json.load(f)`

			`items = data['results']['items']`

			`last_word_end_time = None`

			`for i, item in enumerate(items):`
			`type = item['type']`
			`content = item['alternatives'][0]['content']`

			`if type == "pronunciation":`
			`item_start_time = item['start_time']`

			`if last_word_end_time and item_start_time < last_word_end_time:`
			`item_start_time = last_word_end_time`

			`if chunk['start_time'] == "":`
			`chunk['start_time'] = item_start_time`

			`chunk['end_time'] = item['end_time']`
			`# Don't want to start a fresh sentence with a space`
			`spacer = '' if chunk['word_index'] == 1 else ' '`
			`chunk['sentence'] = chunk['sentence'] + spacer + content`
			`chunk['word_index'] = chunk['word_index'] + 1`
			`last_word_end_time = item['end_time']`

			`elif type == "punctuation":`
			`# Add punctuation`
			`# But don't increment index`
			`chunk['sentence'] = chunk['sentence'] + content`

			`# - we hit the word break limit (provided the NEXT item is not punctuation)`
			`item_is_ending_punctuation = content == '.' or content == '?' or content == '!'`
			`next_item_is_punctuation = i < len(`
			`items) - 1 and items[i+1]['type'] == "punctuation"`
			`hit_word_break_limit = chunk['word_index'] >= word_break_limit and not next_item_is_punctuation`
			`is_last_item = len(items) - 1 == i`

			`if item_is_ending_punctuation or hit_word_break_limit or is_last_item:`

			`chunks.append(chunk)`
			`chunk = {`
			`'start_time': '',`
			`'end_time': '',`
			`'word_index': 1,`
			`'sentence': ''`
			`}`

			`srt = ''`
			`# Build out srt`
			`for i, chunk in enumerate(chunks):`
			`chunk_index = str(i + 1)`
			`srt = srt + chunk_index + "\n"`
			`srt = srt + formatTime(chunk['start_time']) + " --> " + \`
			`formatTime(chunk['end_time']) + "\n" + chunk['sentence'] + "\n\n"`

			`# sys.stdout = open(rf"{current}/aws_subtitle.srt", "w", encoding='utf8')`
			`# aws_subtitle_file = open(`
			`# rf"{current}/aws_subtitle.srt", "w", encoding='utf8').write`
			`open(rf"{current}/a_subtitles.srt", "w", encoding='utf8').write(srt)`
			`# aws_subtitle_file.write(srt)`

			`print(srt)`