Conversion_Kitchen_Code/kitchen_counter/conversion/subtitling/google_srt.py

import os
import srt
import sys
from google.cloud import speech
from google.cloud import storage
import moviepy.editor as mp
import time
import random


os.environ[
    "GOOGLE_APPLICATION_CREDENTIALS"
] = "/home/user/mnf/project/MNF/conversion/subtitling/gifted-mountain-318504-0a5f94cda0c8.json"

basepath = "/home/user/mnf/project/MNF/conversion/subtitling"


# filename2 = sys.argv[1]
# movie_name = sys.argv[3]

# filename1 = os.path.splitext(filename2)[0]

# temp = basepath+"/"+filename2

# print(temp)


def google_sub(filename2, lang_code, vid_path):

    # my_clip = mp.VideoFileClip(filename2)  # uncomment when running from UI
    # # my_clip = mp.VideoFileClip(temp)  # comment when running from command prompt

    # random_num = random.randint(0, 1000)
    # temp = vid_path + "/audio_" + str(random_num) + ".wav"
    # temp1 = "audio_" + str(random_num)+".wav"
    # print(temp1)
    # my_clip.audio.write_audiofile(temp)
    wav_path = vid_path + "/" + filename2

    # time.sleep(60)
    client = storage.Client()
    print("Success 1")
    bucket = client.get_bucket("mnf_subtitle")
    blob = bucket.blob(filename2)

    storage.blob._DEFAULT_CHUNKSIZE = 2097152  # 1024 * 1024 B * 2 = 2 MB
    storage.blob._MAX_MULTIPART_SIZE = 2097152  # 2 MB

    with open(wav_path, "rb") as photo:
        blob.upload_from_file(photo)
    # time.sleep(60)

    sample_rate_hertz = 44100
    language_code = str(lang_code)
    audio_channel_count = 2
    encoding = "LINEAR16"
    out_file = "g_subtitles"
    max_chars = 40
    # in place of course.wav we need to put bucket.blob('')
    storage_uri = "gs://mnf_subtitle/" + filename2
    print(storage_uri)

    def long_running_recognize(uri):

        client = speech.SpeechClient()

        operation = client.long_running_recognize(
            config={
                "enable_word_time_offsets": True,
                "enable_automatic_punctuation": True,
                "sample_rate_hertz": sample_rate_hertz,
                "language_code": language_code,
                "audio_channel_count": audio_channel_count,
                "encoding": encoding,
            },
            audio={"uri": storage_uri},
        )
        response = operation.result()

        subs = []

        for result in response.results:

            subs = break_sentences(subs, result.alternatives[0])

        print("Transcribing finished")
        return subs

    def del_blob(bucket_name, directory_name):
        storage_client = storage.Client()
        bucket = storage_client.get_bucket(bucket_name)
        # list all objects in the directory
        blobs = bucket.list_blobs(prefix=directory_name)
        for blob in blobs:
            blob.delete()

    def break_sentences(subs, alternative):
        firstword = True
        charcount = 0
        idx = len(subs) + 1
        content = ""

        for w in alternative.words:
            if firstword:

                start = w.start_time

            charcount += len(w.word)
            content += " " + w.word.strip()

            if (
                "." in w.word
                or "!" in w.word
                or "?" in w.word
                or charcount > max_chars
                or ("," in w.word and not firstword)
            ):

                subs.append(
                    srt.Subtitle(
                        index=idx,
                        start=start,
                        #  end=w.end_time.ToTimedelta(),
                        end=w.end_time,
                        content=srt.make_legal_content(content),
                    )
                )
                firstword = True
                idx += 1
                content = ""
                charcount = 0
            else:
                firstword = False
        return subs

    def write_srt(subs):
        srt_file = out_file + ".srt"
        print("Writing {} subtitles to: {}".format(language_code, srt_file))
        f = open(rf"{vid_path}/{srt_file}", "w", encoding="utf-8")
        f.writelines(srt.compose(subs))
        f.close()
        return

    def write_txt(subs):
        txt_file = out_file + ".txt"
        print("Writing text to: {}".format(txt_file))
        f = open(txt_file, "w")
        for s in subs:
            f.write(s.content.strip() + "\n")
        f.close()
        return

    subs = long_running_recognize(storage_uri)

    print(subs)

    write_srt(subs)
    # del_blob("mnf_subtitle", filename2)
first commit 2024-04-27 09:33:09 +00:00			`import os`
			`import srt`
			`import sys`
			`from google.cloud import speech`
			`from google.cloud import storage`
			`import moviepy.editor as mp`
			`import time`
			`import random`


			`os.environ[`
			`"GOOGLE_APPLICATION_CREDENTIALS"`
			`] = "/home/user/mnf/project/MNF/conversion/subtitling/gifted-mountain-318504-0a5f94cda0c8.json"`

			`basepath = "/home/user/mnf/project/MNF/conversion/subtitling"`


			`# filename2 = sys.argv[1]`
			`# movie_name = sys.argv[3]`

			`# filename1 = os.path.splitext(filename2)[0]`

			`# temp = basepath+"/"+filename2`

			`# print(temp)`


			`def google_sub(filename2, lang_code, vid_path):`

			`# my_clip = mp.VideoFileClip(filename2) # uncomment when running from UI`
			`# # my_clip = mp.VideoFileClip(temp) # comment when running from command prompt`

			`# random_num = random.randint(0, 1000)`
			`# temp = vid_path + "/audio_" + str(random_num) + ".wav"`
			`# temp1 = "audio_" + str(random_num)+".wav"`
			`# print(temp1)`
			`# my_clip.audio.write_audiofile(temp)`
			`wav_path = vid_path + "/" + filename2`

			`# time.sleep(60)`
			`client = storage.Client()`
			`print("Success 1")`
			`bucket = client.get_bucket("mnf_subtitle")`
			`blob = bucket.blob(filename2)`

			`storage.blob._DEFAULT_CHUNKSIZE = 2097152 # 1024 * 1024 B * 2 = 2 MB`
			`storage.blob._MAX_MULTIPART_SIZE = 2097152 # 2 MB`

			`with open(wav_path, "rb") as photo:`
			`blob.upload_from_file(photo)`
			`# time.sleep(60)`

			`sample_rate_hertz = 44100`
			`language_code = str(lang_code)`
			`audio_channel_count = 2`
			`encoding = "LINEAR16"`
			`out_file = "g_subtitles"`
			`max_chars = 40`
			`# in place of course.wav we need to put bucket.blob('')`
			`storage_uri = "gs://mnf_subtitle/" + filename2`
			`print(storage_uri)`

			`def long_running_recognize(uri):`

			`client = speech.SpeechClient()`

			`operation = client.long_running_recognize(`
			`config={`
			`"enable_word_time_offsets": True,`
			`"enable_automatic_punctuation": True,`
			`"sample_rate_hertz": sample_rate_hertz,`
			`"language_code": language_code,`
			`"audio_channel_count": audio_channel_count,`
			`"encoding": encoding,`
			`},`
			`audio={"uri": storage_uri},`
			`)`
			`response = operation.result()`

			`subs = []`

			`for result in response.results:`

			`subs = break_sentences(subs, result.alternatives[0])`

			`print("Transcribing finished")`
			`return subs`

			`def del_blob(bucket_name, directory_name):`
			`storage_client = storage.Client()`
			`bucket = storage_client.get_bucket(bucket_name)`
			`# list all objects in the directory`
			`blobs = bucket.list_blobs(prefix=directory_name)`
			`for blob in blobs:`
			`blob.delete()`

			`def break_sentences(subs, alternative):`
			`firstword = True`
			`charcount = 0`
			`idx = len(subs) + 1`
			`content = ""`

			`for w in alternative.words:`
			`if firstword:`

			`start = w.start_time`

			`charcount += len(w.word)`
			`content += " " + w.word.strip()`

			`if (`
			`"." in w.word`
			`or "!" in w.word`
			`or "?" in w.word`
			`or charcount > max_chars`
			`or ("," in w.word and not firstword)`
			`):`

			`subs.append(`
			`srt.Subtitle(`
			`index=idx,`
			`start=start,`
			`# end=w.end_time.ToTimedelta(),`
			`end=w.end_time,`
			`content=srt.make_legal_content(content),`
			`)`
			`)`
			`firstword = True`
			`idx += 1`
			`content = ""`
			`charcount = 0`
			`else:`
			`firstword = False`
			`return subs`

			`def write_srt(subs):`
			`srt_file = out_file + ".srt"`
			`print("Writing {} subtitles to: {}".format(language_code, srt_file))`
			`f = open(rf"{vid_path}/{srt_file}", "w", encoding="utf-8")`
			`f.writelines(srt.compose(subs))`
			`f.close()`
			`return`

			`def write_txt(subs):`
			`txt_file = out_file + ".txt"`
			`print("Writing text to: {}".format(txt_file))`
			`f = open(txt_file, "w")`
			`for s in subs:`
			`f.write(s.content.strip() + "\n")`
			`f.close()`
			`return`

			`subs = long_running_recognize(storage_uri)`

			`print(subs)`

			`write_srt(subs)`
			`# del_blob("mnf_subtitle", filename2)`