import os import srt import sys from import speech from import storage import moviepy.editor as mp import time import random os.environ[ "GOOGLE_APPLICATION_CREDENTIALS" ] = "/home/user/mnf/project/MNF/conversion/subtitling/gifted-mountain-318504-0a5f94cda0c8.json" basepath = "/home/user/mnf/project/MNF/conversion/subtitling" # filename2 = sys.argv[1] # movie_name = sys.argv[3] # filename1 = os.path.splitext(filename2)[0] # temp = basepath+"/"+filename2 # print(temp) def google_sub(filename2, lang_code, vid_path): # my_clip = mp.VideoFileClip(filename2) # uncomment when running from UI # # my_clip = mp.VideoFileClip(temp) # comment when running from command prompt # random_num = random.randint(0, 1000) # temp = vid_path + "/audio_" + str(random_num) + ".wav" # temp1 = "audio_" + str(random_num)+".wav" # print(temp1) # wav_path = vid_path + "/" + filename2 # time.sleep(60) client = storage.Client() print("Success 1") bucket = client.get_bucket("mnf_subtitle") blob = bucket.blob(filename2) storage.blob._DEFAULT_CHUNKSIZE = 2097152 # 1024 * 1024 B * 2 = 2 MB storage.blob._MAX_MULTIPART_SIZE = 2097152 # 2 MB with open(wav_path, "rb") as photo: blob.upload_from_file(photo) # time.sleep(60) sample_rate_hertz = 44100 language_code = str(lang_code) audio_channel_count = 2 encoding = "LINEAR16" out_file = "g_subtitles" max_chars = 40 # in place of course.wav we need to put bucket.blob('') storage_uri = "gs://mnf_subtitle/" + filename2 print(storage_uri) def long_running_recognize(uri): client = speech.SpeechClient() operation = client.long_running_recognize( config={ "enable_word_time_offsets": True, "enable_automatic_punctuation": True, "sample_rate_hertz": sample_rate_hertz, "language_code": language_code, "audio_channel_count": audio_channel_count, "encoding": encoding, }, audio={"uri": storage_uri}, ) response = operation.result() subs = [] for result in response.results: subs = break_sentences(subs, result.alternatives[0]) print("Transcribing finished") return subs def del_blob(bucket_name, directory_name): storage_client = storage.Client() bucket = storage_client.get_bucket(bucket_name) # list all objects in the directory blobs = bucket.list_blobs(prefix=directory_name) for blob in blobs: blob.delete() def break_sentences(subs, alternative): firstword = True charcount = 0 idx = len(subs) + 1 content = "" for w in alternative.words: if firstword: start = w.start_time charcount += len(w.word) content += " " + w.word.strip() if ( "." in w.word or "!" in w.word or "?" in w.word or charcount > max_chars or ("," in w.word and not firstword) ): subs.append( srt.Subtitle( index=idx, start=start, # end=w.end_time.ToTimedelta(), end=w.end_time, content=srt.make_legal_content(content), ) ) firstword = True idx += 1 content = "" charcount = 0 else: firstword = False return subs def write_srt(subs): srt_file = out_file + ".srt" print("Writing {} subtitles to: {}".format(language_code, srt_file)) f = open(rf"{vid_path}/{srt_file}", "w", encoding="utf-8") f.writelines(srt.compose(subs)) f.close() return def write_txt(subs): txt_file = out_file + ".txt" print("Writing text to: {}".format(txt_file)) f = open(txt_file, "w") for s in subs: f.write(s.content.strip() + "\n") f.close() return subs = long_running_recognize(storage_uri) print(subs) write_srt(subs) # del_blob("mnf_subtitle", filename2)