Conversion_Kitchen_Code/kitchen_counter/conversion/subtitling/azure_srt.py

import azure.cognitiveservices.speech as speechsdk
import os
import time
import pprint
import json
import srt
import datetime
import sys
import moviepy.editor as mp
import random

basepath = "/home/user/mnf/project/MNF/conversion/subtitling"


# filename2 = sys.argv[1]
# lang_code = sys.argv[2]
# movie_name = sys.argv[3]
# #lang_code = sys.argv[1]
# current = basepath + "/" + movie_name

# filename1 = os.path.splitext(filename2)[0]

# temp = basepath+"/"+filename2

# def azure_sub(filename2, lang_code, current):
#     # my_clip = mp.VideoFileClip(filename2)  # uncomment when running from UI
#     # # my_clip = mp.VideoFileClip(temp)  # comment when running from command prompt
#     # with open(rf"{current}/azure_subtitle.srt", "w") as f:
#     # temp = current+"/"+filename1+".wav"
#     # temp1 = filename1+".wav"
#     # my_clip.audio.write_audiofile(temp)
#     temp = current + "/" + filename2

# path = os.getcwd()
# Creates an instance of a speech config with specified subscription key and service region.
# Replace with your own subscription key and region identifier from here: https://aka.ms/speech/sdkregion
def azure_sub_temp(wav_file, lang_code, current):
    global done
    # path = os.getcwd()
    wave_file_path = current + "/" + wav_file
    done = False

    print("Line 43", done)
    # Creates an instance of a speech config with specified subscription key and service region.
    # Replace with your own subscription key and region identifier from here: https://aka.ms/speech/sdkregion
    speech_key, service_region = "<>", "<>"
    speech_config = speechsdk.SpeechConfig(
        subscription="49301a4f2b7240d29c7ffcc4828d345d", region="eastus")

    # Creates an audio configuration that points to an audio file.
    # Replace with your own audio filename.
    audio_input = speechsdk.audio.AudioConfig(filename=wave_file_path)

    # Creates a recognizer with the given settings
    speech_config.speech_recognition_language = str(lang_code)
    speech_config.request_word_level_timestamps()

    speech_config.enable_dictation()
    speech_config.output_format = speechsdk.OutputFormat(1)

    speech_recognizer = speechsdk.SpeechRecognizer(
        speech_config=speech_config, audio_config=audio_input)

    #result = speech_recognizer.recognize_once()
    all_results = []
    results = []
    transcript = []
    words = []

    # https://docs.microsoft.com/en-us/python/api/azure-cognitiveservices-speech/azure.cognitiveservices.speech.recognitionresult?view=azure-python

    def handle_final_result(evt):
        import json
        all_results.append(evt.result.text)
        results = json.loads(evt.result.json)
        transcript.append(results['DisplayText'])
        confidence_list_temp = [item.get('Confidence')
                                for item in results['NBest']]
        max_confidence_index = confidence_list_temp.index(
            max(confidence_list_temp))
        words.extend(results['NBest'][max_confidence_index]['Words'])

    def stop_cb(evt):
        print('CLOSING on {}'.format(evt))
        speech_recognizer.stop_continuous_recognition()
        global done
        done = True

    speech_recognizer.recognized.connect(handle_final_result)
    # Connect callbacks to the events fired by the speech recognizer
    speech_recognizer.recognizing.connect(
        lambda evt: print('RECOGNIZING: {}'.format(evt)))
    speech_recognizer.recognized.connect(
        lambda evt: print('RECOGNIZED: {}'.format(evt)))
    speech_recognizer.session_started.connect(
        lambda evt: print('SESSION STARTED: {}'.format(evt)))
    speech_recognizer.session_stopped.connect(
        lambda evt: print('SESSION STOPPED {}'.format(evt)))
    speech_recognizer.canceled.connect(
        lambda evt: print('CANCELED {}'.format(evt)))
    # stop continuous recognition on either session stopped or canceled events

    # done = False
    speech_recognizer.session_stopped.connect(stop_cb)
    speech_recognizer.canceled.connect(stop_cb)

    print("Line 105", done)

    speech_recognizer.start_continuous_recognition()

    print("Line before loop: ", done)

    while not done:
        print("In while loop line 106")
        time.sleep(.5)

    print("Printing all results:")
    print(all_results)

    speech_to_text_response = words

    def convertduration(t):
        x = t/10000
        return int((x / 1000)), (x % 1000)

    ##-- Code to Create Subtitle --#

    # 3 Seconds
    bin = 3
    duration = 0
    transcriptions = []
    transcript = ""
    index, prev = 0, 0
    wordstartsec, wordstartmicrosec = 0, 0
    for i in range(len(speech_to_text_response)):
        # Forms the sentence until the bin size condition is met
        transcript = transcript + " " + speech_to_text_response[i]["Word"]
        # Checks whether the elapsed duration is less than the bin size
        if(int((duration / 10000000)) < bin):
            wordstartsec, wordstartmicrosec = convertduration(
                speech_to_text_response[i]["Offset"])
            duration = duration+speech_to_text_response[i]["Offset"]-prev
            prev = speech_to_text_response[i]["Offset"]
            #transcript = transcript + " " + speech_to_text_response[i]["Word"]
        else:
            index = index+1
            #transcript = transcript + " " + speech_to_text_response[i]["Word"]
            transcriptions.append(srt.Subtitle(index, datetime.timedelta(
                0, wordstartsec, wordstartmicrosec), datetime.timedelta(0, wordstartsec+bin, 0), transcript))
            duration = 0
            # print(transcript)
            transcript = ""

    transcriptions.append(srt.Subtitle(index, datetime.timedelta(
        0, wordstartsec, wordstartmicrosec), datetime.timedelta(0, wordstartsec+bin, 0), transcript))
    subtitles = srt.compose(transcriptions)
    with open(rf"{current}/az_subtitles.srt", "w") as f:
        f.write(subtitles)


# wav = "audio_404.wav"
# lang = "en-US"
# loc = "/home/user/mnf/project/MNF/conversion/subtitling"
# azure_sub_temp(wav, lang, loc)
first commit 2024-04-27 09:33:09 +00:00			`import azure.cognitiveservices.speech as speechsdk`
			`import os`
			`import time`
			`import pprint`
			`import json`
			`import srt`
			`import datetime`
			`import sys`
			`import moviepy.editor as mp`
			`import random`

			`basepath = "/home/user/mnf/project/MNF/conversion/subtitling"`


			`# filename2 = sys.argv[1]`
			`# lang_code = sys.argv[2]`
			`# movie_name = sys.argv[3]`
			`# #lang_code = sys.argv[1]`
			`# current = basepath + "/" + movie_name`

			`# filename1 = os.path.splitext(filename2)[0]`

			`# temp = basepath+"/"+filename2`

			`# def azure_sub(filename2, lang_code, current):`
			`# # my_clip = mp.VideoFileClip(filename2) # uncomment when running from UI`
			`# # # my_clip = mp.VideoFileClip(temp) # comment when running from command prompt`
			`# # with open(rf"{current}/azure_subtitle.srt", "w") as f:`
			`# # temp = current+"/"+filename1+".wav"`
			`# # temp1 = filename1+".wav"`
			`# # my_clip.audio.write_audiofile(temp)`
			`# temp = current + "/" + filename2`

			`# path = os.getcwd()`
			`# Creates an instance of a speech config with specified subscription key and service region.`
			`# Replace with your own subscription key and region identifier from here: https://aka.ms/speech/sdkregion`
			`def azure_sub_temp(wav_file, lang_code, current):`
			`global done`
			`# path = os.getcwd()`
			`wave_file_path = current + "/" + wav_file`
			`done = False`

			`print("Line 43", done)`
			`# Creates an instance of a speech config with specified subscription key and service region.`
			`# Replace with your own subscription key and region identifier from here: https://aka.ms/speech/sdkregion`
			`speech_key, service_region = "<>", "<>"`
			`speech_config = speechsdk.SpeechConfig(`
			`subscription="49301a4f2b7240d29c7ffcc4828d345d", region="eastus")`

			`# Creates an audio configuration that points to an audio file.`
			`# Replace with your own audio filename.`
			`audio_input = speechsdk.audio.AudioConfig(filename=wave_file_path)`

			`# Creates a recognizer with the given settings`
			`speech_config.speech_recognition_language = str(lang_code)`
			`speech_config.request_word_level_timestamps()`

			`speech_config.enable_dictation()`
			`speech_config.output_format = speechsdk.OutputFormat(1)`

			`speech_recognizer = speechsdk.SpeechRecognizer(`
			`speech_config=speech_config, audio_config=audio_input)`

			`#result = speech_recognizer.recognize_once()`
			`all_results = []`
			`results = []`
			`transcript = []`
			`words = []`

			`# https://docs.microsoft.com/en-us/python/api/azure-cognitiveservices-speech/azure.cognitiveservices.speech.recognitionresult?view=azure-python`

			`def handle_final_result(evt):`
			`import json`
			`all_results.append(evt.result.text)`
			`results = json.loads(evt.result.json)`
			`transcript.append(results['DisplayText'])`
			`confidence_list_temp = [item.get('Confidence')`
			`for item in results['NBest']]`
			`max_confidence_index = confidence_list_temp.index(`
			`max(confidence_list_temp))`
			`words.extend(results['NBest'][max_confidence_index]['Words'])`

			`def stop_cb(evt):`
			`print('CLOSING on {}'.format(evt))`
			`speech_recognizer.stop_continuous_recognition()`
			`global done`
			`done = True`

			`speech_recognizer.recognized.connect(handle_final_result)`
			`# Connect callbacks to the events fired by the speech recognizer`
			`speech_recognizer.recognizing.connect(`
			`lambda evt: print('RECOGNIZING: {}'.format(evt)))`
			`speech_recognizer.recognized.connect(`
			`lambda evt: print('RECOGNIZED: {}'.format(evt)))`
			`speech_recognizer.session_started.connect(`
			`lambda evt: print('SESSION STARTED: {}'.format(evt)))`
			`speech_recognizer.session_stopped.connect(`
			`lambda evt: print('SESSION STOPPED {}'.format(evt)))`
			`speech_recognizer.canceled.connect(`
			`lambda evt: print('CANCELED {}'.format(evt)))`
			`# stop continuous recognition on either session stopped or canceled events`

			`# done = False`
			`speech_recognizer.session_stopped.connect(stop_cb)`
			`speech_recognizer.canceled.connect(stop_cb)`

			`print("Line 105", done)`

			`speech_recognizer.start_continuous_recognition()`

			`print("Line before loop: ", done)`

			`while not done:`
			`print("In while loop line 106")`
			`time.sleep(.5)`

			`print("Printing all results:")`
			`print(all_results)`

			`speech_to_text_response = words`

			`def convertduration(t):`
			`x = t/10000`
			`return int((x / 1000)), (x % 1000)`

			`##-- Code to Create Subtitle --#`

			`# 3 Seconds`
			`bin = 3`
			`duration = 0`
			`transcriptions = []`
			`transcript = ""`
			`index, prev = 0, 0`
			`wordstartsec, wordstartmicrosec = 0, 0`
			`for i in range(len(speech_to_text_response)):`
			`# Forms the sentence until the bin size condition is met`
			`transcript = transcript + " " + speech_to_text_response[i]["Word"]`
			`# Checks whether the elapsed duration is less than the bin size`
			`if(int((duration / 10000000)) < bin):`
			`wordstartsec, wordstartmicrosec = convertduration(`
			`speech_to_text_response[i]["Offset"])`
			`duration = duration+speech_to_text_response[i]["Offset"]-prev`
			`prev = speech_to_text_response[i]["Offset"]`
			`#transcript = transcript + " " + speech_to_text_response[i]["Word"]`
			`else:`
			`index = index+1`
			`#transcript = transcript + " " + speech_to_text_response[i]["Word"]`
			`transcriptions.append(srt.Subtitle(index, datetime.timedelta(`
			`0, wordstartsec, wordstartmicrosec), datetime.timedelta(0, wordstartsec+bin, 0), transcript))`
			`duration = 0`
			`# print(transcript)`
			`transcript = ""`

			`transcriptions.append(srt.Subtitle(index, datetime.timedelta(`
			`0, wordstartsec, wordstartmicrosec), datetime.timedelta(0, wordstartsec+bin, 0), transcript))`
			`subtitles = srt.compose(transcriptions)`
			`with open(rf"{current}/az_subtitles.srt", "w") as f:`
			`f.write(subtitles)`


			`# wav = "audio_404.wav"`
			`# lang = "en-US"`
			`# loc = "/home/user/mnf/project/MNF/conversion/subtitling"`
			`# azure_sub_temp(wav, lang, loc)`