Conversion_Kitchen_Code/kitchen_counter/conversion/ppt_translation/translation.py

import time
import docx
import sys
from translation_resources import ibm_watson, google, aws, azure, lingvanex, yandex 
from script_detector import script_cat
from script_writing import default_script
from translation_metric import manual_diff_score, bleu_diff_score, gleu_diff_score, meteor_diff_score, rouge_diff_score, diff_score, critera4_5
from selection_source import selection_source, function5, function41, function311, function221, function2111, function11111, selection_source_transliteration, two_sources_two_outputs
from tqdm import tqdm
import os
import string
from optimisation1 import all_translator
import argparse
import boto3
from botocore.exceptions import ClientError
from pptx import Presentation
from pptx.enum.lang import MSO_LANGUAGE_ID
from docx.shared import Inches, Cm, Pt
from docx.enum.text import WD_ALIGN_PARAGRAPH
from docx.enum.table import WD_TABLE_ALIGNMENT, WD_ALIGN_VERTICAL
from docx2pdf import convert

from pptx.enum.text import MSO_ANCHOR, MSO_AUTO_SIZE
from pptx.enum.shapes import MSO_SHAPE_TYPE

LANGUAGE_CODE_TO_LANGUAGE_ID = {
    'af': MSO_LANGUAGE_ID.AFRIKAANS,
    'am': MSO_LANGUAGE_ID.AMHARIC,
    'ar': MSO_LANGUAGE_ID.ARABIC,
    'bg': MSO_LANGUAGE_ID.BULGARIAN,
    'bn': MSO_LANGUAGE_ID.BENGALI,
    'bs': MSO_LANGUAGE_ID.BOSNIAN,
    'cs': MSO_LANGUAGE_ID.CZECH,
    'da': MSO_LANGUAGE_ID.DANISH,
    'de': MSO_LANGUAGE_ID.GERMAN,
    'el': MSO_LANGUAGE_ID.GREEK,
    'en': MSO_LANGUAGE_ID.ENGLISH_US,
    'es': MSO_LANGUAGE_ID.SPANISH,
    'et': MSO_LANGUAGE_ID.ESTONIAN,
    'fi': MSO_LANGUAGE_ID.FINNISH,
    'fr': MSO_LANGUAGE_ID.FRENCH,
    'fr-CA': MSO_LANGUAGE_ID.FRENCH_CANADIAN,
    'ha': MSO_LANGUAGE_ID.HAUSA,
    'he': MSO_LANGUAGE_ID.HEBREW,
    'hi': MSO_LANGUAGE_ID.HINDI,
    'hr': MSO_LANGUAGE_ID.CROATIAN,
    'hu': MSO_LANGUAGE_ID.HUNGARIAN,
    'id': MSO_LANGUAGE_ID.INDONESIAN,
    'it': MSO_LANGUAGE_ID.ITALIAN,
    'ja': MSO_LANGUAGE_ID.JAPANESE,
    'ka': MSO_LANGUAGE_ID.GEORGIAN,
    'ko': MSO_LANGUAGE_ID.KOREAN,
    'lv': MSO_LANGUAGE_ID.LATVIAN,
    'ms': MSO_LANGUAGE_ID.MALAYSIAN,
    'nl': MSO_LANGUAGE_ID.DUTCH,
    'no': MSO_LANGUAGE_ID.NORWEGIAN_BOKMOL,
    'pl': MSO_LANGUAGE_ID.POLISH,
    'ps': MSO_LANGUAGE_ID.PASHTO,
    'pt': MSO_LANGUAGE_ID.BRAZILIAN_PORTUGUESE,
    'ro': MSO_LANGUAGE_ID.ROMANIAN,
    'ru': MSO_LANGUAGE_ID.RUSSIAN,
    'sk': MSO_LANGUAGE_ID.SLOVAK,
    'sl': MSO_LANGUAGE_ID.SLOVENIAN,
    'so': MSO_LANGUAGE_ID.SOMALI,
    'sq': MSO_LANGUAGE_ID.ALBANIAN,
    'sr': MSO_LANGUAGE_ID.SERBIAN_LATIN,
    'sv': MSO_LANGUAGE_ID.SWEDISH,
    'sw': MSO_LANGUAGE_ID.SWAHILI,
    'ta': MSO_LANGUAGE_ID.TAMIL,
    'th': MSO_LANGUAGE_ID.THAI,
    'tr': MSO_LANGUAGE_ID.TURKISH,
    'uk': MSO_LANGUAGE_ID.UKRAINIAN,
    'ur': MSO_LANGUAGE_ID.URDU,
    'vi': MSO_LANGUAGE_ID.VIETNAMESE,
    'zh': MSO_LANGUAGE_ID.CHINESE_SINGAPORE ,
    'zh-TW': MSO_LANGUAGE_ID.CHINESE_HONG_KONG_SAR,
}

TERMINOLOGY_NAME = 'pptx-translator-terminology'
translate = boto3.client(service_name='translate')

def add_dial_comparison_doc_ppt(doc, table, sentence, output):
    row_Cells = table.add_row().cells
    row_Cells[0].text= sentence
    row_Cells[1].text= output

def add_dial_comparison_doc_srt(doc, table, sentence, output):
    row_Cells = table.add_row().cells
    row_Cells[0].text= sentence
    row_Cells[1].text= output
    row_Cells[2].text= output

def translate_presentation(presentation, source_language_code, target_language_code, terminology_names, doc, table):
    etc_list = ["", " ", ',', ' ,']
    slide_number = 1
    print('96this the line')
    for slide in presentation.slides:
        print('Aman Slide {slide_number} of {number_of_slides}'.format(
            slide_number=slide_number,
            number_of_slides=len(presentation.slides)))
        slide_number += 1

        group_shapes = [
            shp for shp in slide.shapes
            if shp.shape_type == MSO_SHAPE_TYPE.GROUP]
        print('106this the line')
        for group_shape in group_shapes:
            for shape in group_shape.shapes:
                if shape.has_text_frame:
                    # print(shape.text)

                    if shape.text in etc_list:
                        continue

                    sentence = shape.text
                    output = translate_comparison(
                        shape.text, source_language_code, target_language_code)

                    #slide.notes_slide.notes_text_frame.text = output
                    #shape.text = output
                    shape.text_frame.text = output
                    #shape.text_frame.auto_size = MSO_AUTO_SIZE.NONE
                    #shape.text.auto_size = MSO_AUTO_SIZE.NONE
                    #shape.text_frame.text.font.language_id = LANGUAGE_CODE_TO_LANGUAGE_ID[target_language_code]
                    shape.text_frame.paragraphs[0].runs[0].font.language_id = LANGUAGE_CODE_TO_LANGUAGE_ID[target_language_code]

                    if output not in list(string.punctuation):
                        add_dial_comparison_doc_ppt(
                            doc, table, sentence, output)

        # translate comments
        print('132this the line')
        if slide.has_notes_slide:
            text_frame = slide.notes_slide.notes_text_frame
            if len(text_frame.text) > 0:
                #print("text", text_frame.text)
                try:
                    # response = translate.translate_text(
                    #         Text=text_frame.text,
                    #         SourceLanguageCode=source_language_code,
                    #         TargetLanguageCode=target_language_code,
                    #         TerminologyNames=terminology_names)
                    # slide.notes_slide.notes_text_frame.text = response.get('TranslatedText')
                    if text_frame.text in etc_list:
                        continue
                    #print("text", text_frame.text)
                    sentence = text_frame.text
                    output = translate_comparison(
                        text_frame.text, source_language_code, target_language_code)
                    slide.notes_slide.notes_text_frame.text = output
                    if output not in list(string.punctuation):
                        add_dial_comparison_doc_ppt(
                            doc, table, sentence, output)

                except ClientError as client_error:
                    if (client_error.response['Error']['Code'] == 'ValidationException'):
                        # Text not valid. Maybe the size of the text exceeds the size limit of the service.
                        # Amazon Translate limits: https://docs.aws.amazon.com/translate/latest/dg/what-is-limits.html
                        # We just ignore and don't translate the text.
                        print('Invalid text. Ignoring...')
        print('159this the line')
        for shape in slide.shapes:
            #print("shape", shape)

            if not shape.has_text_frame:
                continue
            # print("shape", shape.text_frame.text)
            # print("shape", shape.text_frame)

            for paragraph in shape.text_frame.paragraphs:
                for index, paragraph_run in enumerate(paragraph.runs):
                    try:
                        #print("text", paragraph_run.text)
                        # response = translate.translate_text(
                        #         Text=paragraph_run.text,
                        #         SourceLanguageCode=source_language_code,
                        #         TargetLanguageCode=target_language_code,
                        #         TerminologyNames=terminology_names)
                        # paragraph.runs[index].text = response.get('TranslatedText')

                        if paragraph_run.text in etc_list:
                            continue
                        #print("paragraph", paragraph_run.text)
                        sentence = paragraph_run.text
                        output = translate_comparison(
                            paragraph_run.text, source_language_code, target_language_code)
                        paragraph.runs[index].text = output

                        if output not in list(string.punctuation):
                            add_dial_comparison_doc_ppt(
                                doc, table, sentence, output)

                        paragraph.runs[index].font.language_id = LANGUAGE_CODE_TO_LANGUAGE_ID[target_language_code]
                    except ClientError as client_error:
                        if (client_error.response['Error']['Code'] == 'ValidationException'):
                            # Text not valid. Maybe the size of the text exceeds the size limit of the service.
                            # Amazon Translate limits: https://docs.aws.amazon.com/translate/latest/dg/what-is-limits.html
                            # We just ignore and don't translate the text.
                            print('Invalid text. Ignoring...')
                print('B')

        print('200this the line ')
        if slide.has_table:
            print("table got")
            tables = slide.tables
            for table in tables:
                for row in table.rows:
                    for cell in row.cells:
                        print("cells got")
                        # Check if the text frame is in a table and the text is not in the etc_list.
                        if cell.text_frame is not None and cell.text_frame.text not in etc_list:
                            # Get the text from the text frame.
                            sentence = cell.text_frame.text
                            print(sentence,"printing sentence")
                            # Translate the text using the translate_comparison() function.
                            output = translate_comparison(sentence, source_language_code, target_language_code)
                            print(output,"printing output")
                            # Set the text of the text frame to the translated text.
                            cell.text_frame.text = output

                            # If the translated text is not punctuation, add it to the document and table.
                            if output not in list(string.punctuation):
                                add_dial_comparison_doc_ppt(doc, table, sentence, output)

                            # Set the language ID of the text frame to the target language ID.
                            cell.text_frame.paragraphs[0].runs[0].font.language_id = LANGUAGE_CODE_TO_LANGUAGE_ID[target_language_code]


        if slide.has_images:
            images = slide.images
            for image in images:
                if image.has_text_frame:
                    if image.text_frame.text in etc_list:
                        continue

                    sentence = image.text_frame.text
                    output = all_translator(image.text_frame.text, source_language_code, target_language_code)
                    image.text_frame.text = output

                    if output not in list(string.punctuation):
                      add_dial_comparison_doc_ppt(doc, table, sentence, output)
                      
                    print('A')
                            
                            
def import_terminology(terminology_file_path):
    print('Importing terminology data from {file_path}...'.format(file_path=terminology_file_path))
    with open(terminology_file_path, 'rb') as f:
        translate.import_terminology(Name=TERMINOLOGY_NAME,
                                     MergeStrategy='OVERWRITE',
                                     TerminologyData={'File': bytearray(f.read()), 'Format': 'CSV'})

def punct_remover_w_o_digits(string):
  punctuations = '''!()-[]{};:'"\,<>./?@#$%^&*_~…।'''
  for x in string.lower():
    if x in punctuations:
      string = string.replace(x, "")
  return string

def translated_srt(filename, source_lang, target_lang, doc_srt, table_srt):
    #doc = docx.Document(filename)
    etc_list = ["", " ", ',', ' ,']
    doc = filename
    for para in tqdm(doc.paragraphs):
        text = punct_remover_w_o_digits(para.text)
        if text in etc_list:
          continue
        #print("text now", text)
        if text[:5].isdigit()==False:
          sentence =  para.text
          output = all_translator(para.text, source_lang, target_lang)
          para.text = output
        
          add_dial_comparison_doc_srt(doc_srt, table_srt, sentence, output) 

    doc.save("translated_srt "+target_lang+".docx")
    doc_srt.save("srt_table"+target_lang+".docx") 
    #convert("srt_table"+target_lang+".docx")   

def main():
    argument_parser = argparse.ArgumentParser(
            'Translates pptx files from source language to target language using Amazon Translate service')
    argument_parser.add_argument(
            'source_language_code', type=str,
            help='The language code for the language of the source text. Example: en')
    argument_parser.add_argument(
            'target_language_code', type=str,
            help='The language code requested for the language of the target text. Example: pt')
    argument_parser.add_argument(
            'input_file_path', type=str,
            help='The path of the pptx file that should be translated')
    argument_parser.add_argument(
            'input_srt_file', type=str,
            help='The path of the srt file that should be translated')   

    argument_parser.add_argument(
            '--terminology', type=str,
            help='The path of the terminology CSV file')
    args = argument_parser.parse_args()

    #print("srt", args.input_srt_file)
    terminology_names = []
    if args.terminology:
        import_terminology(args.terminology)
        terminology_names = [TERMINOLOGY_NAME]

    print('Translating {file_path} from {source_language_code} to {target_language_code}...'.format(
            file_path=args.input_file_path,
            source_language_code=args.source_language_code,
            target_language_code=args.target_language_code))
    presentation = Presentation(args.input_file_path)

    doc_ppt = docx.Document()
    sections = doc_ppt.sections
    for section in sections:
        section.top_margin = Inches(0.2)
        section.bottom_margin = Inches(0.2)
        section.left_margin = Inches(0.2)
        section.right_margin = Inches(0.2)
    section = doc_ppt.sections[-1]
    new_height = section.page_width
    section.page_width = section.page_height
    section.page_height = new_height
    name = args.input_file_path
    doc_ppt.add_heading(name, 0)
    doc_para = doc_ppt.add_paragraph()
    table_ppt = doc_ppt.add_table(rows=1,cols=2)
    table_ppt.style = 'Table Grid'
    hdr_Cells = table_ppt.rows[0].cells
    hdr_Cells[0].paragraphs[0].add_run("Original Sentence").bold=True
    hdr_Cells[1].paragraphs[0].add_run("Translated Sentence").bold=True
    
    translate_presentation(presentation,
                           args.source_language_code,
                           args.target_language_code,
                           terminology_names, doc_ppt, table_ppt)
    
    output_file_path = args.input_file_path.replace(
            '.pptx', '-{language_code}.pptx'.format(language_code=args.target_language_code))
    print('Saving {output_file_path}...'.format(output_file_path=output_file_path))
    presentation.save(output_file_path)
    doc_ppt.save("ppt_table"+ args.source_language_code + ".docx")
    convert("ppt_table.docx")  
    
    doc = docx.Document()
    file = args.input_srt_file
    file = open( file, "r")
    lines = file.readlines()
    file.close()
    lines = (line.rstrip() for line in lines)
    for line in lines:
        doc.add_paragraph(line, style = 'No Spacing')
    doc.save("converted_srt.docx")

    time.sleep(10)
    print("after sleep")
    file = docx.Document("converted_srt.docx")
    
    doc_srt = docx.Document()
    sections = doc_srt.sections
    for section in sections:
        section.top_margin = Inches(0.2)
        section.bottom_margin = Inches(0.2)
        section.left_margin = Inches(0.2)
        section.right_margin = Inches(0.2)
    section = doc_srt.sections[-1]
    new_height = section.page_width
    section.page_width = section.page_height
    section.page_height = new_height
    name = args.input_srt_file
    doc_srt.add_heading(name, 0)
    doc_para = doc_srt.add_paragraph()
    table_srt = doc_srt.add_table(rows=1,cols=3)
    table_srt.style = 'Table Grid'
    hdr_Cells = table_srt.rows[0].cells
    hdr_Cells[0].paragraphs[0].add_run("Original Sentence").bold=True
    hdr_Cells[1].paragraphs[0].add_run("Translated Sentence").bold=True
    hdr_Cells[2].paragraphs[0].add_run("LPP Corrected Sentence").bold=True

    translated_srt(file, args.source_language_code,
                        args.target_language_code, doc_srt, table_srt)

if __name__== '__main__':
  main()
first commit 2024-04-27 09:33:09 +00:00			`import time`
			`import docx`
			`import sys`
			`from translation_resources import ibm_watson, google, aws, azure, lingvanex, yandex`
			`from script_detector import script_cat`
			`from script_writing import default_script`
			`from translation_metric import manual_diff_score, bleu_diff_score, gleu_diff_score, meteor_diff_score, rouge_diff_score, diff_score, critera4_5`
			`from selection_source import selection_source, function5, function41, function311, function221, function2111, function11111, selection_source_transliteration, two_sources_two_outputs`
			`from tqdm import tqdm`
			`import os`
			`import string`
			`from optimisation1 import all_translator`
			`import argparse`
			`import boto3`
			`from botocore.exceptions import ClientError`
			`from pptx import Presentation`
			`from pptx.enum.lang import MSO_LANGUAGE_ID`
			`from docx.shared import Inches, Cm, Pt`
			`from docx.enum.text import WD_ALIGN_PARAGRAPH`
			`from docx.enum.table import WD_TABLE_ALIGNMENT, WD_ALIGN_VERTICAL`
			`from docx2pdf import convert`

			`from pptx.enum.text import MSO_ANCHOR, MSO_AUTO_SIZE`
			`from pptx.enum.shapes import MSO_SHAPE_TYPE`

			`LANGUAGE_CODE_TO_LANGUAGE_ID = {`
			`'af': MSO_LANGUAGE_ID.AFRIKAANS,`
			`'am': MSO_LANGUAGE_ID.AMHARIC,`
			`'ar': MSO_LANGUAGE_ID.ARABIC,`
			`'bg': MSO_LANGUAGE_ID.BULGARIAN,`
			`'bn': MSO_LANGUAGE_ID.BENGALI,`
			`'bs': MSO_LANGUAGE_ID.BOSNIAN,`
			`'cs': MSO_LANGUAGE_ID.CZECH,`
			`'da': MSO_LANGUAGE_ID.DANISH,`
			`'de': MSO_LANGUAGE_ID.GERMAN,`
			`'el': MSO_LANGUAGE_ID.GREEK,`
			`'en': MSO_LANGUAGE_ID.ENGLISH_US,`
			`'es': MSO_LANGUAGE_ID.SPANISH,`
			`'et': MSO_LANGUAGE_ID.ESTONIAN,`
			`'fi': MSO_LANGUAGE_ID.FINNISH,`
			`'fr': MSO_LANGUAGE_ID.FRENCH,`
			`'fr-CA': MSO_LANGUAGE_ID.FRENCH_CANADIAN,`
			`'ha': MSO_LANGUAGE_ID.HAUSA,`
			`'he': MSO_LANGUAGE_ID.HEBREW,`
			`'hi': MSO_LANGUAGE_ID.HINDI,`
			`'hr': MSO_LANGUAGE_ID.CROATIAN,`
			`'hu': MSO_LANGUAGE_ID.HUNGARIAN,`
			`'id': MSO_LANGUAGE_ID.INDONESIAN,`
			`'it': MSO_LANGUAGE_ID.ITALIAN,`
			`'ja': MSO_LANGUAGE_ID.JAPANESE,`
			`'ka': MSO_LANGUAGE_ID.GEORGIAN,`
			`'ko': MSO_LANGUAGE_ID.KOREAN,`
			`'lv': MSO_LANGUAGE_ID.LATVIAN,`
			`'ms': MSO_LANGUAGE_ID.MALAYSIAN,`
			`'nl': MSO_LANGUAGE_ID.DUTCH,`
			`'no': MSO_LANGUAGE_ID.NORWEGIAN_BOKMOL,`
			`'pl': MSO_LANGUAGE_ID.POLISH,`
			`'ps': MSO_LANGUAGE_ID.PASHTO,`
			`'pt': MSO_LANGUAGE_ID.BRAZILIAN_PORTUGUESE,`
			`'ro': MSO_LANGUAGE_ID.ROMANIAN,`
			`'ru': MSO_LANGUAGE_ID.RUSSIAN,`
			`'sk': MSO_LANGUAGE_ID.SLOVAK,`
			`'sl': MSO_LANGUAGE_ID.SLOVENIAN,`
			`'so': MSO_LANGUAGE_ID.SOMALI,`
			`'sq': MSO_LANGUAGE_ID.ALBANIAN,`
			`'sr': MSO_LANGUAGE_ID.SERBIAN_LATIN,`
			`'sv': MSO_LANGUAGE_ID.SWEDISH,`
			`'sw': MSO_LANGUAGE_ID.SWAHILI,`
			`'ta': MSO_LANGUAGE_ID.TAMIL,`
			`'th': MSO_LANGUAGE_ID.THAI,`
			`'tr': MSO_LANGUAGE_ID.TURKISH,`
			`'uk': MSO_LANGUAGE_ID.UKRAINIAN,`
			`'ur': MSO_LANGUAGE_ID.URDU,`
			`'vi': MSO_LANGUAGE_ID.VIETNAMESE,`
			`'zh': MSO_LANGUAGE_ID.CHINESE_SINGAPORE ,`
			`'zh-TW': MSO_LANGUAGE_ID.CHINESE_HONG_KONG_SAR,`
			`}`

			`TERMINOLOGY_NAME = 'pptx-translator-terminology'`
			`translate = boto3.client(service_name='translate')`

			`def add_dial_comparison_doc_ppt(doc, table, sentence, output):`
			`row_Cells = table.add_row().cells`
			`row_Cells[0].text= sentence`
			`row_Cells[1].text= output`

			`def add_dial_comparison_doc_srt(doc, table, sentence, output):`
			`row_Cells = table.add_row().cells`
			`row_Cells[0].text= sentence`
			`row_Cells[1].text= output`
			`row_Cells[2].text= output`

			`def translate_presentation(presentation, source_language_code, target_language_code, terminology_names, doc, table):`
			`etc_list = ["", " ", ',', ' ,']`
			`slide_number = 1`
			`print('96this the line')`
			`for slide in presentation.slides:`
			`print('Aman Slide {slide_number} of {number_of_slides}'.format(`
			`slide_number=slide_number,`
			`number_of_slides=len(presentation.slides)))`
			`slide_number += 1`

			`group_shapes = [`
			`shp for shp in slide.shapes`
			`if shp.shape_type == MSO_SHAPE_TYPE.GROUP]`
			`print('106this the line')`
			`for group_shape in group_shapes:`
			`for shape in group_shape.shapes:`
			`if shape.has_text_frame:`
			`# print(shape.text)`

			`if shape.text in etc_list:`
			`continue`

			`sentence = shape.text`
			`output = translate_comparison(`
			`shape.text, source_language_code, target_language_code)`

			`#slide.notes_slide.notes_text_frame.text = output`
			`#shape.text = output`
			`shape.text_frame.text = output`
			`#shape.text_frame.auto_size = MSO_AUTO_SIZE.NONE`
			`#shape.text.auto_size = MSO_AUTO_SIZE.NONE`
			`#shape.text_frame.text.font.language_id = LANGUAGE_CODE_TO_LANGUAGE_ID[target_language_code]`
			`shape.text_frame.paragraphs[0].runs[0].font.language_id = LANGUAGE_CODE_TO_LANGUAGE_ID[target_language_code]`

			`if output not in list(string.punctuation):`
			`add_dial_comparison_doc_ppt(`
			`doc, table, sentence, output)`

			`# translate comments`
			`print('132this the line')`
			`if slide.has_notes_slide:`
			`text_frame = slide.notes_slide.notes_text_frame`
			`if len(text_frame.text) > 0:`
			`#print("text", text_frame.text)`
			`try:`
			`# response = translate.translate_text(`
			`# Text=text_frame.text,`
			`# SourceLanguageCode=source_language_code,`
			`# TargetLanguageCode=target_language_code,`
			`# TerminologyNames=terminology_names)`
			`# slide.notes_slide.notes_text_frame.text = response.get('TranslatedText')`
			`if text_frame.text in etc_list:`
			`continue`
			`#print("text", text_frame.text)`
			`sentence = text_frame.text`
			`output = translate_comparison(`
			`text_frame.text, source_language_code, target_language_code)`
			`slide.notes_slide.notes_text_frame.text = output`
			`if output not in list(string.punctuation):`
			`add_dial_comparison_doc_ppt(`
			`doc, table, sentence, output)`

			`except ClientError as client_error:`
			`if (client_error.response['Error']['Code'] == 'ValidationException'):`
			`# Text not valid. Maybe the size of the text exceeds the size limit of the service.`
			`# Amazon Translate limits: https://docs.aws.amazon.com/translate/latest/dg/what-is-limits.html`
			`# We just ignore and don't translate the text.`
			`print('Invalid text. Ignoring...')`
			`print('159this the line')`
			`for shape in slide.shapes:`
			`#print("shape", shape)`

			`if not shape.has_text_frame:`
			`continue`
			`# print("shape", shape.text_frame.text)`
			`# print("shape", shape.text_frame)`

			`for paragraph in shape.text_frame.paragraphs:`
			`for index, paragraph_run in enumerate(paragraph.runs):`
			`try:`
			`#print("text", paragraph_run.text)`
			`# response = translate.translate_text(`
			`# Text=paragraph_run.text,`
			`# SourceLanguageCode=source_language_code,`
			`# TargetLanguageCode=target_language_code,`
			`# TerminologyNames=terminology_names)`
			`# paragraph.runs[index].text = response.get('TranslatedText')`

			`if paragraph_run.text in etc_list:`
			`continue`
			`#print("paragraph", paragraph_run.text)`
			`sentence = paragraph_run.text`
			`output = translate_comparison(`
			`paragraph_run.text, source_language_code, target_language_code)`
			`paragraph.runs[index].text = output`

			`if output not in list(string.punctuation):`
			`add_dial_comparison_doc_ppt(`
			`doc, table, sentence, output)`

			`paragraph.runs[index].font.language_id = LANGUAGE_CODE_TO_LANGUAGE_ID[target_language_code]`
			`except ClientError as client_error:`
			`if (client_error.response['Error']['Code'] == 'ValidationException'):`
			`# Text not valid. Maybe the size of the text exceeds the size limit of the service.`
			`# Amazon Translate limits: https://docs.aws.amazon.com/translate/latest/dg/what-is-limits.html`
			`# We just ignore and don't translate the text.`
			`print('Invalid text. Ignoring...')`
			`print('B')`

			`print('200this the line ')`
			`if slide.has_table:`
			`print("table got")`
			`tables = slide.tables`
			`for table in tables:`
			`for row in table.rows:`
			`for cell in row.cells:`
			`print("cells got")`
			`# Check if the text frame is in a table and the text is not in the etc_list.`
			`if cell.text_frame is not None and cell.text_frame.text not in etc_list:`
			`# Get the text from the text frame.`
			`sentence = cell.text_frame.text`
			`print(sentence,"printing sentence")`
			`# Translate the text using the translate_comparison() function.`
			`output = translate_comparison(sentence, source_language_code, target_language_code)`
			`print(output,"printing output")`
			`# Set the text of the text frame to the translated text.`
			`cell.text_frame.text = output`

			`# If the translated text is not punctuation, add it to the document and table.`
			`if output not in list(string.punctuation):`
			`add_dial_comparison_doc_ppt(doc, table, sentence, output)`

			`# Set the language ID of the text frame to the target language ID.`
			`cell.text_frame.paragraphs[0].runs[0].font.language_id = LANGUAGE_CODE_TO_LANGUAGE_ID[target_language_code]`



			`if slide.has_images:`
			`images = slide.images`
			`for image in images:`
			`if image.has_text_frame:`
			`if image.text_frame.text in etc_list:`
			`continue`

			`sentence = image.text_frame.text`
			`output = all_translator(image.text_frame.text, source_language_code, target_language_code)`
			`image.text_frame.text = output`

			`if output not in list(string.punctuation):`
			`add_dial_comparison_doc_ppt(doc, table, sentence, output)`

			`print('A')`




			`def import_terminology(terminology_file_path):`
			`print('Importing terminology data from {file_path}...'.format(file_path=terminology_file_path))`
			`with open(terminology_file_path, 'rb') as f:`
			`translate.import_terminology(Name=TERMINOLOGY_NAME,`
			`MergeStrategy='OVERWRITE',`
			`TerminologyData={'File': bytearray(f.read()), 'Format': 'CSV'})`

			`def punct_remover_w_o_digits(string):`
			`punctuations = '''!()-[]{};:'"\,<>./?@#$%^&*_~…।'''`
			`for x in string.lower():`
			`if x in punctuations:`
			`string = string.replace(x, "")`
			`return string`

			`def translated_srt(filename, source_lang, target_lang, doc_srt, table_srt):`
			`#doc = docx.Document(filename)`
			`etc_list = ["", " ", ',', ' ,']`
			`doc = filename`
			`for para in tqdm(doc.paragraphs):`
			`text = punct_remover_w_o_digits(para.text)`
			`if text in etc_list:`
			`continue`
			`#print("text now", text)`
			`if text[:5].isdigit()==False:`
			`sentence = para.text`
			`output = all_translator(para.text, source_lang, target_lang)`
			`para.text = output`

			`add_dial_comparison_doc_srt(doc_srt, table_srt, sentence, output)`

			`doc.save("translated_srt "+target_lang+".docx")`
			`doc_srt.save("srt_table"+target_lang+".docx")`
			`#convert("srt_table"+target_lang+".docx")`

			`def main():`
			`argument_parser = argparse.ArgumentParser(`
			`'Translates pptx files from source language to target language using Amazon Translate service')`
			`argument_parser.add_argument(`
			`'source_language_code', type=str,`
			`help='The language code for the language of the source text. Example: en')`
			`argument_parser.add_argument(`
			`'target_language_code', type=str,`
			`help='The language code requested for the language of the target text. Example: pt')`
			`argument_parser.add_argument(`
			`'input_file_path', type=str,`
			`help='The path of the pptx file that should be translated')`
			`argument_parser.add_argument(`
			`'input_srt_file', type=str,`
			`help='The path of the srt file that should be translated')`

			`argument_parser.add_argument(`
			`'--terminology', type=str,`
			`help='The path of the terminology CSV file')`
			`args = argument_parser.parse_args()`

			`#print("srt", args.input_srt_file)`
			`terminology_names = []`
			`if args.terminology:`
			`import_terminology(args.terminology)`
			`terminology_names = [TERMINOLOGY_NAME]`

			`print('Translating {file_path} from {source_language_code} to {target_language_code}...'.format(`
			`file_path=args.input_file_path,`
			`source_language_code=args.source_language_code,`
			`target_language_code=args.target_language_code))`
			`presentation = Presentation(args.input_file_path)`

			`doc_ppt = docx.Document()`
			`sections = doc_ppt.sections`
			`for section in sections:`
			`section.top_margin = Inches(0.2)`
			`section.bottom_margin = Inches(0.2)`
			`section.left_margin = Inches(0.2)`
			`section.right_margin = Inches(0.2)`
			`section = doc_ppt.sections[-1]`
			`new_height = section.page_width`
			`section.page_width = section.page_height`
			`section.page_height = new_height`
			`name = args.input_file_path`
			`doc_ppt.add_heading(name, 0)`
			`doc_para = doc_ppt.add_paragraph()`
			`table_ppt = doc_ppt.add_table(rows=1,cols=2)`
			`table_ppt.style = 'Table Grid'`
			`hdr_Cells = table_ppt.rows[0].cells`
			`hdr_Cells[0].paragraphs[0].add_run("Original Sentence").bold=True`
			`hdr_Cells[1].paragraphs[0].add_run("Translated Sentence").bold=True`

			`translate_presentation(presentation,`
			`args.source_language_code,`
			`args.target_language_code,`
			`terminology_names, doc_ppt, table_ppt)`

			`output_file_path = args.input_file_path.replace(`
			`'.pptx', '-{language_code}.pptx'.format(language_code=args.target_language_code))`
			`print('Saving {output_file_path}...'.format(output_file_path=output_file_path))`
			`presentation.save(output_file_path)`
			`doc_ppt.save("ppt_table"+ args.source_language_code + ".docx")`
			`convert("ppt_table.docx")`

			`doc = docx.Document()`
			`file = args.input_srt_file`
			`file = open( file, "r")`
			`lines = file.readlines()`
			`file.close()`
			`lines = (line.rstrip() for line in lines)`
			`for line in lines:`
			`doc.add_paragraph(line, style = 'No Spacing')`
			`doc.save("converted_srt.docx")`

			`time.sleep(10)`
			`print("after sleep")`
			`file = docx.Document("converted_srt.docx")`

			`doc_srt = docx.Document()`
			`sections = doc_srt.sections`
			`for section in sections:`
			`section.top_margin = Inches(0.2)`
			`section.bottom_margin = Inches(0.2)`
			`section.left_margin = Inches(0.2)`
			`section.right_margin = Inches(0.2)`
			`section = doc_srt.sections[-1]`
			`new_height = section.page_width`
			`section.page_width = section.page_height`
			`section.page_height = new_height`
			`name = args.input_srt_file`
			`doc_srt.add_heading(name, 0)`
			`doc_para = doc_srt.add_paragraph()`
			`table_srt = doc_srt.add_table(rows=1,cols=3)`
			`table_srt.style = 'Table Grid'`
			`hdr_Cells = table_srt.rows[0].cells`
			`hdr_Cells[0].paragraphs[0].add_run("Original Sentence").bold=True`
			`hdr_Cells[1].paragraphs[0].add_run("Translated Sentence").bold=True`
			`hdr_Cells[2].paragraphs[0].add_run("LPP Corrected Sentence").bold=True`

			`translated_srt(file, args.source_language_code,`
			`args.target_language_code, doc_srt, table_srt)`

			`if __name__== '__main__':`
			`main()`