180 lines
6.2 KiB
Python
180 lines
6.2 KiB
Python
|
import time
|
||
|
import boto3
|
||
|
import sys
|
||
|
import uuid
|
||
|
import requests
|
||
|
import json
|
||
|
import urllib
|
||
|
import moviepy.editor as mp
|
||
|
import sys
|
||
|
import os
|
||
|
import random
|
||
|
from MNF.settings import BasePath
|
||
|
|
||
|
basePath = BasePath()
|
||
|
aws_json_path = basePath + "/conversion/subtitling"
|
||
|
|
||
|
# basepath = "/home/user/mnf/project/MNF/conversion/subtitling"
|
||
|
|
||
|
|
||
|
# filename2 = sys.argv[1]
|
||
|
# lang_code = sys.argv[2]
|
||
|
# movie_name = sys.argv[3]
|
||
|
# lang_code = sys.argv[1]
|
||
|
# current = basepath + "/" + movie_name
|
||
|
|
||
|
# filename1 = os.path.splitext(filename2)[0]
|
||
|
|
||
|
# temp = basepath+"/"+filename2
|
||
|
|
||
|
|
||
|
def transcribe_file(job_name, file_uri, transcribe_client, lang_code, vid_path):
|
||
|
transcribe_client.start_transcription_job(
|
||
|
TranscriptionJobName=job_name,
|
||
|
Media={"MediaFileUri": file_uri},
|
||
|
MediaFormat="wav",
|
||
|
LanguageCode=str(lang_code),
|
||
|
)
|
||
|
max_tries = 60
|
||
|
while max_tries > 0:
|
||
|
max_tries -= 1
|
||
|
job = transcribe_client.get_transcription_job(
|
||
|
TranscriptionJobName=job_name)
|
||
|
job_status = job["TranscriptionJob"]["TranscriptionJobStatus"]
|
||
|
if job_status in ["COMPLETED", "FAILED"]:
|
||
|
# print(f"Job {job_name} is {job_status}")
|
||
|
if job_status == "COMPLETED":
|
||
|
response = job["TranscriptionJob"]["Transcript"]["TranscriptFileUri"]
|
||
|
# data = json.loads(job)
|
||
|
print(response)
|
||
|
|
||
|
r = requests.get(response, allow_redirects=True)
|
||
|
print("Output.json file path: ", vid_path)
|
||
|
open(rf"{vid_path}/output.json", "wb").write(r.content)
|
||
|
# print(data)
|
||
|
print(job)
|
||
|
print(
|
||
|
"Download the transcript from\n"
|
||
|
"\t{job['TranscriptionJob']['Transcript']['TranscriptFileUri']}."
|
||
|
)
|
||
|
break
|
||
|
else:
|
||
|
print("Waiting for {job_name}. Current status is {job_status}")
|
||
|
time.sleep(10)
|
||
|
return
|
||
|
|
||
|
|
||
|
""" transcribe = boto3.client('transcribe')
|
||
|
while True:
|
||
|
status = transcribe.get_transcription_job(
|
||
|
TranscriptionJobName=job_name)
|
||
|
if status['TranscriptionJob']['TranscriptionJobStatus'] in ['COMPLETED', 'FAILED']:
|
||
|
#end = datetime.now()
|
||
|
break
|
||
|
print("Not ready yet...")
|
||
|
time.sleep(5)
|
||
|
print("processing time is {end - start}")
|
||
|
print(
|
||
|
"transcript URL is {status['TranscriptionJob']['Transcript']['TranscriptFileUri']}")
|
||
|
"""
|
||
|
|
||
|
|
||
|
def aws_sub(wav_file, lang_code, vid_path):
|
||
|
|
||
|
temp2 = os.path.splitext(wav_file)[0]
|
||
|
|
||
|
wav_path = vid_path + "/" + wav_file
|
||
|
|
||
|
with open(rf"{basePath}/MNF/json_keys/keys_aws.json") as f:
|
||
|
keys1 = json.load(f)
|
||
|
|
||
|
session = boto3.Session(
|
||
|
aws_access_key_id=keys1["aws_access_key_id"],
|
||
|
aws_secret_access_key=keys1["aws_secret_access_key"],
|
||
|
region_name=keys1["region_name"],
|
||
|
)
|
||
|
|
||
|
s3 = session.resource("s3")
|
||
|
BUCKET = "nishant1234"
|
||
|
|
||
|
# import boto3
|
||
|
# session = boto3.Session(
|
||
|
# aws_access_key_id='<your_access_key_id>',
|
||
|
# aws_secret_access_key='<your_secret_access_key>'
|
||
|
# )
|
||
|
|
||
|
# #Then use the session to get the resource
|
||
|
# s3 = session.resource('s3')
|
||
|
|
||
|
# s3.Bucket('stackvidhya').upload_file('E:/temp/testfile.txt','file2_uploaded_by_boto3.txt')
|
||
|
|
||
|
# print(filename1+".wav")
|
||
|
|
||
|
s3.Bucket(BUCKET).upload_file(wav_path, wav_file)
|
||
|
|
||
|
print("Upload successful")
|
||
|
|
||
|
time.sleep(30)
|
||
|
transcribe_client = session.client("transcribe", region_name="ap-south-1")
|
||
|
# transcribe_client = boto3.client('transcribe')
|
||
|
|
||
|
# file_uri = 'https://nishant1234.s3.ap-south-1.amazonaws.com/'+filename1+'.wav'
|
||
|
file_uri = "https://nishant1234.s3.ap-south-1.amazonaws.com/" + temp2 + ".wav"
|
||
|
random_num = random.randint(0, 1000)
|
||
|
job = temp2 + "-job-" + str(random_num)
|
||
|
|
||
|
# def transcribe_file(job_name, file_uri, transcribe_client):
|
||
|
# transcribe_client.start_transcription_job(
|
||
|
# TranscriptionJobName=job_name,
|
||
|
# Media={'MediaFileUri': file_uri},
|
||
|
# MediaFormat='wav',
|
||
|
# LanguageCode=str(lang_code)
|
||
|
# )
|
||
|
# max_tries = 60
|
||
|
# while max_tries > 0:
|
||
|
# max_tries -= 1
|
||
|
# job = transcribe_client.get_transcription_job(
|
||
|
# TranscriptionJobName=job_name)
|
||
|
# job_status = job['TranscriptionJob']['TranscriptionJobStatus']
|
||
|
# if job_status in ['COMPLETED', 'FAILED']:
|
||
|
# #print(f"Job {job_name} is {job_status}")
|
||
|
# if job_status == 'COMPLETED':
|
||
|
# response = (job['TranscriptionJob']
|
||
|
# ['Transcript']['TranscriptFileUri'])
|
||
|
# #data = json.loads(job)
|
||
|
# print(response)
|
||
|
|
||
|
# r = requests.get(response, allow_redirects=True)
|
||
|
# print("Output.json file path: ", vid_path)
|
||
|
# open(rf'{vid_path}/output.json', 'wb').write(r.content)
|
||
|
# # print(data)
|
||
|
# print(job)
|
||
|
# print(
|
||
|
# "Download the transcript from\n" "\t{job['TranscriptionJob']['Transcript']['TranscriptFileUri']}.")
|
||
|
# break
|
||
|
# else:
|
||
|
# print("Waiting for {job_name}. Current status is {job_status}")
|
||
|
# time.sleep(10)
|
||
|
|
||
|
# transcribe = boto3.client('transcribe')
|
||
|
# while True:
|
||
|
# status = transcribe.get_transcription_job(
|
||
|
# TranscriptionJobName=job_name)
|
||
|
# if status['TranscriptionJob']['TranscriptionJobStatus'] in ['COMPLETED', 'FAILED']:
|
||
|
# #end = datetime.now()
|
||
|
# break
|
||
|
# print("Not ready yet...")
|
||
|
# time.sleep(5)
|
||
|
# print("processing time is {end - start}")
|
||
|
# print(
|
||
|
# "transcript URL is {status['TranscriptionJob']['Transcript']['TranscriptFileUri']}")
|
||
|
|
||
|
transcribe_file(job, file_uri, transcribe_client, lang_code, vid_path)
|
||
|
# s3.delete_object(Bucket=BUCKET, Key=wav_file)
|
||
|
# transcribe_file(job, file_uri, transcribe_client)
|
||
|
|
||
|
# def main():
|
||
|
|
||
|
# if __name__ == '__main__':
|
||
|
# main()
|