import time import boto3 import sys import uuid import requests import json import urllib import moviepy.editor as mp import sys import os import random from MNF.settings import BasePath basePath = BasePath() aws_json_path = basePath + "/conversion/subtitling" # basepath = "/home/user/mnf/project/MNF/conversion/subtitling" # filename2 = sys.argv[1] # lang_code = sys.argv[2] # movie_name = sys.argv[3] # lang_code = sys.argv[1] # current = basepath + "/" + movie_name # filename1 = os.path.splitext(filename2)[0] # temp = basepath+"/"+filename2 def transcribe_file(job_name, file_uri, transcribe_client, lang_code, vid_path): transcribe_client.start_transcription_job( TranscriptionJobName=job_name, Media={"MediaFileUri": file_uri}, MediaFormat="wav", LanguageCode=str(lang_code), ) max_tries = 60 while max_tries > 0: max_tries -= 1 job = transcribe_client.get_transcription_job( TranscriptionJobName=job_name) job_status = job["TranscriptionJob"]["TranscriptionJobStatus"] if job_status in ["COMPLETED", "FAILED"]: # print(f"Job {job_name} is {job_status}") if job_status == "COMPLETED": response = job["TranscriptionJob"]["Transcript"]["TranscriptFileUri"] # data = json.loads(job) print(response) r = requests.get(response, allow_redirects=True) print("Output.json file path: ", vid_path) open(rf"{vid_path}/output.json", "wb").write(r.content) # print(data) print(job) print( "Download the transcript from\n" "\t{job['TranscriptionJob']['Transcript']['TranscriptFileUri']}." ) break else: print("Waiting for {job_name}. Current status is {job_status}") time.sleep(10) return """ transcribe = boto3.client('transcribe') while True: status = transcribe.get_transcription_job( TranscriptionJobName=job_name) if status['TranscriptionJob']['TranscriptionJobStatus'] in ['COMPLETED', 'FAILED']: #end = datetime.now() break print("Not ready yet...") time.sleep(5) print("processing time is {end - start}") print( "transcript URL is {status['TranscriptionJob']['Transcript']['TranscriptFileUri']}") """ def aws_sub(wav_file, lang_code, vid_path): temp2 = os.path.splitext(wav_file)[0] wav_path = vid_path + "/" + wav_file with open(rf"{basePath}/MNF/json_keys/keys_aws.json") as f: keys1 = json.load(f) session = boto3.Session( aws_access_key_id=keys1["aws_access_key_id"], aws_secret_access_key=keys1["aws_secret_access_key"], region_name=keys1["region_name"], ) s3 = session.resource("s3") BUCKET = "nishant1234" # import boto3 # session = boto3.Session( # aws_access_key_id='', # aws_secret_access_key='' # ) # #Then use the session to get the resource # s3 = session.resource('s3') # s3.Bucket('stackvidhya').upload_file('E:/temp/testfile.txt','file2_uploaded_by_boto3.txt') # print(filename1+".wav") s3.Bucket(BUCKET).upload_file(wav_path, wav_file) print("Upload successful") time.sleep(30) transcribe_client = session.client("transcribe", region_name="ap-south-1") # transcribe_client = boto3.client('transcribe') # file_uri = 'https://nishant1234.s3.ap-south-1.amazonaws.com/'+filename1+'.wav' file_uri = "https://nishant1234.s3.ap-south-1.amazonaws.com/" + temp2 + ".wav" random_num = random.randint(0, 1000) job = temp2 + "-job-" + str(random_num) # def transcribe_file(job_name, file_uri, transcribe_client): # transcribe_client.start_transcription_job( # TranscriptionJobName=job_name, # Media={'MediaFileUri': file_uri}, # MediaFormat='wav', # LanguageCode=str(lang_code) # ) # max_tries = 60 # while max_tries > 0: # max_tries -= 1 # job = transcribe_client.get_transcription_job( # TranscriptionJobName=job_name) # job_status = job['TranscriptionJob']['TranscriptionJobStatus'] # if job_status in ['COMPLETED', 'FAILED']: # #print(f"Job {job_name} is {job_status}") # if job_status == 'COMPLETED': # response = (job['TranscriptionJob'] # ['Transcript']['TranscriptFileUri']) # #data = json.loads(job) # print(response) # r = requests.get(response, allow_redirects=True) # print("Output.json file path: ", vid_path) # open(rf'{vid_path}/output.json', 'wb').write(r.content) # # print(data) # print(job) # print( # "Download the transcript from\n" "\t{job['TranscriptionJob']['Transcript']['TranscriptFileUri']}.") # break # else: # print("Waiting for {job_name}. Current status is {job_status}") # time.sleep(10) # transcribe = boto3.client('transcribe') # while True: # status = transcribe.get_transcription_job( # TranscriptionJobName=job_name) # if status['TranscriptionJob']['TranscriptionJobStatus'] in ['COMPLETED', 'FAILED']: # #end = datetime.now() # break # print("Not ready yet...") # time.sleep(5) # print("processing time is {end - start}") # print( # "transcript URL is {status['TranscriptionJob']['Transcript']['TranscriptFileUri']}") transcribe_file(job, file_uri, transcribe_client, lang_code, vid_path) # s3.delete_object(Bucket=BUCKET, Key=wav_file) # transcribe_file(job, file_uri, transcribe_client) # def main(): # if __name__ == '__main__': # main()