Conversion_Kitchen_Code/kitchen_counter/conversion/subtitling/AWS_S3.py

180 lines
6.2 KiB
Python
Executable File

import time
import boto3
import sys
import uuid
import requests
import json
import urllib
import moviepy.editor as mp
import sys
import os
import random
from MNF.settings import BasePath
basePath = BasePath()
aws_json_path = basePath + "/conversion/subtitling"
# basepath = "/home/user/mnf/project/MNF/conversion/subtitling"
# filename2 = sys.argv[1]
# lang_code = sys.argv[2]
# movie_name = sys.argv[3]
# lang_code = sys.argv[1]
# current = basepath + "/" + movie_name
# filename1 = os.path.splitext(filename2)[0]
# temp = basepath+"/"+filename2
def transcribe_file(job_name, file_uri, transcribe_client, lang_code, vid_path):
transcribe_client.start_transcription_job(
TranscriptionJobName=job_name,
Media={"MediaFileUri": file_uri},
MediaFormat="wav",
LanguageCode=str(lang_code),
)
max_tries = 60
while max_tries > 0:
max_tries -= 1
job = transcribe_client.get_transcription_job(
TranscriptionJobName=job_name)
job_status = job["TranscriptionJob"]["TranscriptionJobStatus"]
if job_status in ["COMPLETED", "FAILED"]:
# print(f"Job {job_name} is {job_status}")
if job_status == "COMPLETED":
response = job["TranscriptionJob"]["Transcript"]["TranscriptFileUri"]
# data = json.loads(job)
print(response)
r = requests.get(response, allow_redirects=True)
print("Output.json file path: ", vid_path)
open(rf"{vid_path}/output.json", "wb").write(r.content)
# print(data)
print(job)
print(
"Download the transcript from\n"
"\t{job['TranscriptionJob']['Transcript']['TranscriptFileUri']}."
)
break
else:
print("Waiting for {job_name}. Current status is {job_status}")
time.sleep(10)
return
""" transcribe = boto3.client('transcribe')
while True:
status = transcribe.get_transcription_job(
TranscriptionJobName=job_name)
if status['TranscriptionJob']['TranscriptionJobStatus'] in ['COMPLETED', 'FAILED']:
#end = datetime.now()
break
print("Not ready yet...")
time.sleep(5)
print("processing time is {end - start}")
print(
"transcript URL is {status['TranscriptionJob']['Transcript']['TranscriptFileUri']}")
"""
def aws_sub(wav_file, lang_code, vid_path):
temp2 = os.path.splitext(wav_file)[0]
wav_path = vid_path + "/" + wav_file
with open(rf"{basePath}/MNF/json_keys/keys_aws.json") as f:
keys1 = json.load(f)
session = boto3.Session(
aws_access_key_id=keys1["aws_access_key_id"],
aws_secret_access_key=keys1["aws_secret_access_key"],
region_name=keys1["region_name"],
)
s3 = session.resource("s3")
BUCKET = "nishant1234"
# import boto3
# session = boto3.Session(
# aws_access_key_id='<your_access_key_id>',
# aws_secret_access_key='<your_secret_access_key>'
# )
# #Then use the session to get the resource
# s3 = session.resource('s3')
# s3.Bucket('stackvidhya').upload_file('E:/temp/testfile.txt','file2_uploaded_by_boto3.txt')
# print(filename1+".wav")
s3.Bucket(BUCKET).upload_file(wav_path, wav_file)
print("Upload successful")
time.sleep(30)
transcribe_client = session.client("transcribe", region_name="ap-south-1")
# transcribe_client = boto3.client('transcribe')
# file_uri = 'https://nishant1234.s3.ap-south-1.amazonaws.com/'+filename1+'.wav'
file_uri = "https://nishant1234.s3.ap-south-1.amazonaws.com/" + temp2 + ".wav"
random_num = random.randint(0, 1000)
job = temp2 + "-job-" + str(random_num)
# def transcribe_file(job_name, file_uri, transcribe_client):
# transcribe_client.start_transcription_job(
# TranscriptionJobName=job_name,
# Media={'MediaFileUri': file_uri},
# MediaFormat='wav',
# LanguageCode=str(lang_code)
# )
# max_tries = 60
# while max_tries > 0:
# max_tries -= 1
# job = transcribe_client.get_transcription_job(
# TranscriptionJobName=job_name)
# job_status = job['TranscriptionJob']['TranscriptionJobStatus']
# if job_status in ['COMPLETED', 'FAILED']:
# #print(f"Job {job_name} is {job_status}")
# if job_status == 'COMPLETED':
# response = (job['TranscriptionJob']
# ['Transcript']['TranscriptFileUri'])
# #data = json.loads(job)
# print(response)
# r = requests.get(response, allow_redirects=True)
# print("Output.json file path: ", vid_path)
# open(rf'{vid_path}/output.json', 'wb').write(r.content)
# # print(data)
# print(job)
# print(
# "Download the transcript from\n" "\t{job['TranscriptionJob']['Transcript']['TranscriptFileUri']}.")
# break
# else:
# print("Waiting for {job_name}. Current status is {job_status}")
# time.sleep(10)
# transcribe = boto3.client('transcribe')
# while True:
# status = transcribe.get_transcription_job(
# TranscriptionJobName=job_name)
# if status['TranscriptionJob']['TranscriptionJobStatus'] in ['COMPLETED', 'FAILED']:
# #end = datetime.now()
# break
# print("Not ready yet...")
# time.sleep(5)
# print("processing time is {end - start}")
# print(
# "transcript URL is {status['TranscriptionJob']['Transcript']['TranscriptFileUri']}")
transcribe_file(job, file_uri, transcribe_client, lang_code, vid_path)
# s3.delete_object(Bucket=BUCKET, Key=wav_file)
# transcribe_file(job, file_uri, transcribe_client)
# def main():
# if __name__ == '__main__':
# main()