Conversion_Kitchen_Code/kitchen_counter/conversion/subtitling/AWS_S3.py

180 lines
6.2 KiB
Python
Raw Normal View History

2024-04-27 09:33:09 +00:00
import time
import boto3
import sys
import uuid
import requests
import json
import urllib
import moviepy.editor as mp
import sys
import os
import random
from MNF.settings import BasePath
basePath = BasePath()
aws_json_path = basePath + "/conversion/subtitling"
# basepath = "/home/user/mnf/project/MNF/conversion/subtitling"
# filename2 = sys.argv[1]
# lang_code = sys.argv[2]
# movie_name = sys.argv[3]
# lang_code = sys.argv[1]
# current = basepath + "/" + movie_name
# filename1 = os.path.splitext(filename2)[0]
# temp = basepath+"/"+filename2
def transcribe_file(job_name, file_uri, transcribe_client, lang_code, vid_path):
transcribe_client.start_transcription_job(
TranscriptionJobName=job_name,
Media={"MediaFileUri": file_uri},
MediaFormat="wav",
LanguageCode=str(lang_code),
)
max_tries = 60
while max_tries > 0:
max_tries -= 1
job = transcribe_client.get_transcription_job(
TranscriptionJobName=job_name)
job_status = job["TranscriptionJob"]["TranscriptionJobStatus"]
if job_status in ["COMPLETED", "FAILED"]:
# print(f"Job {job_name} is {job_status}")
if job_status == "COMPLETED":
response = job["TranscriptionJob"]["Transcript"]["TranscriptFileUri"]
# data = json.loads(job)
print(response)
r = requests.get(response, allow_redirects=True)
print("Output.json file path: ", vid_path)
open(rf"{vid_path}/output.json", "wb").write(r.content)
# print(data)
print(job)
print(
"Download the transcript from\n"
"\t{job['TranscriptionJob']['Transcript']['TranscriptFileUri']}."
)
break
else:
print("Waiting for {job_name}. Current status is {job_status}")
time.sleep(10)
return
""" transcribe = boto3.client('transcribe')
while True:
status = transcribe.get_transcription_job(
TranscriptionJobName=job_name)
if status['TranscriptionJob']['TranscriptionJobStatus'] in ['COMPLETED', 'FAILED']:
#end = datetime.now()
break
print("Not ready yet...")
time.sleep(5)
print("processing time is {end - start}")
print(
"transcript URL is {status['TranscriptionJob']['Transcript']['TranscriptFileUri']}")
"""
def aws_sub(wav_file, lang_code, vid_path):
temp2 = os.path.splitext(wav_file)[0]
wav_path = vid_path + "/" + wav_file
with open(rf"{basePath}/MNF/json_keys/keys_aws.json") as f:
keys1 = json.load(f)
session = boto3.Session(
aws_access_key_id=keys1["aws_access_key_id"],
aws_secret_access_key=keys1["aws_secret_access_key"],
region_name=keys1["region_name"],
)
s3 = session.resource("s3")
BUCKET = "nishant1234"
# import boto3
# session = boto3.Session(
# aws_access_key_id='<your_access_key_id>',
# aws_secret_access_key='<your_secret_access_key>'
# )
# #Then use the session to get the resource
# s3 = session.resource('s3')
# s3.Bucket('stackvidhya').upload_file('E:/temp/testfile.txt','file2_uploaded_by_boto3.txt')
# print(filename1+".wav")
s3.Bucket(BUCKET).upload_file(wav_path, wav_file)
print("Upload successful")
time.sleep(30)
transcribe_client = session.client("transcribe", region_name="ap-south-1")
# transcribe_client = boto3.client('transcribe')
# file_uri = 'https://nishant1234.s3.ap-south-1.amazonaws.com/'+filename1+'.wav'
file_uri = "https://nishant1234.s3.ap-south-1.amazonaws.com/" + temp2 + ".wav"
random_num = random.randint(0, 1000)
job = temp2 + "-job-" + str(random_num)
# def transcribe_file(job_name, file_uri, transcribe_client):
# transcribe_client.start_transcription_job(
# TranscriptionJobName=job_name,
# Media={'MediaFileUri': file_uri},
# MediaFormat='wav',
# LanguageCode=str(lang_code)
# )
# max_tries = 60
# while max_tries > 0:
# max_tries -= 1
# job = transcribe_client.get_transcription_job(
# TranscriptionJobName=job_name)
# job_status = job['TranscriptionJob']['TranscriptionJobStatus']
# if job_status in ['COMPLETED', 'FAILED']:
# #print(f"Job {job_name} is {job_status}")
# if job_status == 'COMPLETED':
# response = (job['TranscriptionJob']
# ['Transcript']['TranscriptFileUri'])
# #data = json.loads(job)
# print(response)
# r = requests.get(response, allow_redirects=True)
# print("Output.json file path: ", vid_path)
# open(rf'{vid_path}/output.json', 'wb').write(r.content)
# # print(data)
# print(job)
# print(
# "Download the transcript from\n" "\t{job['TranscriptionJob']['Transcript']['TranscriptFileUri']}.")
# break
# else:
# print("Waiting for {job_name}. Current status is {job_status}")
# time.sleep(10)
# transcribe = boto3.client('transcribe')
# while True:
# status = transcribe.get_transcription_job(
# TranscriptionJobName=job_name)
# if status['TranscriptionJob']['TranscriptionJobStatus'] in ['COMPLETED', 'FAILED']:
# #end = datetime.now()
# break
# print("Not ready yet...")
# time.sleep(5)
# print("processing time is {end - start}")
# print(
# "transcript URL is {status['TranscriptionJob']['Transcript']['TranscriptFileUri']}")
transcribe_file(job, file_uri, transcribe_client, lang_code, vid_path)
# s3.delete_object(Bucket=BUCKET, Key=wav_file)
# transcribe_file(job, file_uri, transcribe_client)
# def main():
# if __name__ == '__main__':
# main()