Conversion_Kitchen_Code/kitchen_counter/conversion/booktranslator/newConvertBook_name.py

#for extraction of text and images from pdf
import logging
import os.path
from adobe.pdfservices.operation.auth.credentials import Credentials
from adobe.pdfservices.operation.exception.exceptions import ServiceApiException, ServiceUsageException, SdkException
from adobe.pdfservices.operation.pdfops.options.extractpdf.extract_pdf_options import ExtractPDFOptions
from adobe.pdfservices.operation.pdfops.options.extractpdf.extract_renditions_element_type import \
    ExtractRenditionsElementType
from adobe.pdfservices.operation.pdfops.options.extractpdf.extract_element_type import ExtractElementType
from adobe.pdfservices.operation.execution_context import ExecutionContext
from adobe.pdfservices.operation.io.file_ref import FileRef
from adobe.pdfservices.operation.pdfops.extract_pdf_operation import ExtractPDFOperation

#for zip extraction
from zipfile import ZipFile

#for parsing json
import json


#for adding tables in docx
from docx.enum.text import WD_ALIGN_PARAGRAPH
from docx import Document
import pandas as pd
from docx.shared import Mm
from docx.shared import Inches, Cm, Pt
from docx.oxml.shared import OxmlElement
from docx.oxml.ns import qn


api_creds = "/home/user/mnf/project/MNF/conversion/booktranslator/api_creds/pdfservices-api-credentials.json"
logging.basicConfig(level=os.environ.get("LOGLEVEL", "INFO"))


def set_cell_margins(cell, **kwargs):

    tc = cell._tc
    tcPr = tc.get_or_add_tcPr()
    tcMar = OxmlElement('w:tcMar')

    for m in ["top", "start", "bottom", "end"]:
        if m in kwargs:
            node = OxmlElement("w:{}".format(m))
            node.set(qn('w:w'), str(kwargs.get(m)))
            node.set(qn('w:type'), 'dxa')
            tcMar.append(node)

    tcPr.append(tcMar)


def add_table_to_doc(doc, df):

    columns = list(df.columns)

    table = doc.add_table(rows=1, cols=len(columns), style="Table Grid")
    table.autofit = True

    for col in range(len(columns)):
        set_cell_margins(table.cell(0, col), top=100, start=100, bottom=100, end=50)
        table.cell(0, col).text = columns[col].replace(" _x000D_", "").capitalize()


    for i, row in enumerate(df.itertuples()):
        table_row = table.add_row().cells
        for col in range(len(columns)):
            set_cell_margins(table_row[col], top=100, start=100, bottom=100, end=50)
            table_row[col].text = str(row[col + 1]).replace(" _x000D_", "")

    return doc


def pdf_text_images_extractor(api_creds, inputFile, outputzip):
    try:
        # get base path.
        base_path = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

        # Initial setup, create credentials instance.
        credentials = Credentials.service_account_credentials_builder() \
            .from_file(api_creds) \
            .build()

        # Create an ExecutionContext using credentials and create a new operation instance.
        execution_context = ExecutionContext.create(credentials)
        extract_pdf_operation = ExtractPDFOperation.create_new()

        # Set operation input from a source file.
        source = FileRef.create_from_local_file(inputFile)
        extract_pdf_operation.set_input(source)

        # Build ExtractPDF options and set them into the operation
        extract_pdf_options: ExtractPDFOptions = ExtractPDFOptions.builder() \
            .with_elements_to_extract([ExtractElementType.TEXT, ExtractElementType.TABLES]) \
            .with_elements_to_extract_renditions([ExtractRenditionsElementType.TABLES,
                                                  ExtractRenditionsElementType.FIGURES]) \
            .build()
        extract_pdf_operation.set_options(extract_pdf_options)

        # Execute the operation.
        result: FileRef = extract_pdf_operation.execute(execution_context)
        # print("33333333333333333333333333")
        # print(result)
        # f = open("myfile.txt", "wb")
        # print("12121212")
        # # Save the result to the specified location.
        # result.write_to_stream(f)
        # print("ttttttttttttt")
        # f.close()
        result.save_as("/tmp/extra")
    except (ServiceApiException, ServiceUsageException, SdkException):
        logging.exception("Exception encountered while executing operation")


def zip_extractor(filename):

    with ZipFile(filename, 'r') as zipObj:
        # Extract all the contents of zip file in current directory
        zipObj.extractall("contents")


def json_parser(filename):
    # Opening JSON file
    f = open(filename, encoding="utf8")

    # returns JSON object as a dictionary
    data = json.load(f)

    # Iterating through the json list
    print(data['extended_metadata']['page_count'])
    print(data['extended_metadata']['language'])


    all_pages_data = []
    curr_page_contents = []
    current_page = 0

    for element in data['elements']:

        #for detection of headings and paragraphs
        if list(element['Path'])[11] == "H" or list(element['Path'])[11] == "P":

            if current_page == element["Page"]:
                pass
            else:
                all_pages_data.append(curr_page_contents)
                current_page += 1
                curr_page_contents = []

            current_element = ["Text",element["Text"],element["TextSize"], element["Font"]["family_name"],
                               element["Font"]["italic"],element["Font"]["weight"]]
            try:
                output = element["attributes"]["SpaceAfter"]
                current_element.append(output)
            except:
                current_element.append("")
            try:
                output = element["attributes"]["TextAlign"]
                current_element.append(output)
            except:
                current_element.append("")

            curr_page_contents.append(current_element)


        #for detection of a list between paragraphs
        elif list(element['Path'])[11] == "L":

            if current_page == element["Page"]:
                pass
            else:
                all_pages_data.append(curr_page_contents)
                current_page += 1
                curr_page_contents = []

            differ_creator = (element["Path"]).split("/")
            if differ_creator[-1] == "Lbl":
                current_element = ["List Numbering", element["Text"], element["TextSize"], element["Font"]["family_name"],
                                   element["Font"]["italic"],element["Font"]["weight"]]
            else:
                current_element = ["List Data", element["Text"], element["TextSize"],
                                   element["Font"]["family_name"],
                                   element["Font"]["italic"],element["Font"]["weight"]]

            curr_page_contents.append(current_element)


        #for detection of figures
        elif list(element['Path'])[11] == "F":

            if current_page == element["Page"]:
                pass
            else:
                all_pages_data.append(curr_page_contents)
                current_page += 1
                curr_page_contents = []

            current_element = ["Figure",element["filePaths"][0],element["attributes"]["Placement"],
                               element["attributes"]["BBox"][0],element["attributes"]["BBox"][1],
                               element["attributes"]["BBox"][2],element["attributes"]["BBox"][3]]


            curr_page_contents.append(current_element)


        #for detection of tables
        elif list(element['Path'])[11] == "S":

            if current_page == element["Page"]:
                pass
            else:
                all_pages_data.append(curr_page_contents)
                current_page += 1
                curr_page_contents = []

            if list(element['Path'])[11:21] == "Sect/Table":
                curr_page_contents.append(["Table",element["attributes"]["NumRow"],element["attributes"]["NumCol"],element["filePaths"][0]])
            else:
                pass


    all_pages_data.append(curr_page_contents)
    # Closing file
    f.close()
    return all_pages_data


def word_creator(all_data):
    listo = ""

    doc = Document()

    for page in all_data:

        for ele in page:


            #writing text in docx
            if ele[0] == "Text":

                style = doc.styles['Normal']
                font = style.font
                font.name = str(ele[3])
                font.size = Pt(int(ele[2]))
                act = doc.add_paragraph(style=style)
                act_format = act.paragraph_format
                if ele[6] == "":
                    act_format.space_after = Pt(12)
                else:
                    act_format.space_after = Pt(int(ele[6]))
                if ele[7] == "":
                    act_format.alignment = WD_ALIGN_PARAGRAPH.JUSTIFY
                else:
                    if ele[7] == "Justify":
                        act_format.alignment = WD_ALIGN_PARAGRAPH.JUSTIFY
                    elif ele[7] == "Start":
                        act_format.alignment = WD_ALIGN_PARAGRAPH.LEFT
                    elif ele[7] == "Center":
                        act_format.alignment = WD_ALIGN_PARAGRAPH.CENTER
                    elif ele[7] == "End":
                        act_format.alignment = WD_ALIGN_PARAGRAPH.RIGHT
                    else:
                        act_format.alignment = WD_ALIGN_PARAGRAPH.JUSTIFY
                act_format.line_spacing = Pt(12)
                act_format.left_indent = Inches(0)
                # if (non_dial_dest_lang == 'hi') or (non_dial_dest_lang == 'gu'):
                #     act.style.font.name = 'Mangal'
                # else:
                #     act.style.font.name = 'Courier New'
                para = act.add_run(ele[1])
                if ele[4] == "true":
                    para.italic = True
                if ele[5] > 400:
                    para.bold = True


            #adding table in docx
            elif ele[0] == "Table":

                # read xlsx file
                hr_df = pd.read_excel('C:\\Users\\ANSU\\Desktop\\MNF\\convertBook\\contents_table\\tables\\fileoutpart0.xlsx')


                doc = Document()
                section = doc.sections[0]
                section.left_margin = Mm(5)
                section.right_margin = Mm(5)

                # add tables
                add_table_to_doc(doc, hr_df.iloc[:5])


            #adding list in docx
            elif ele[0] == "List Numbering":

                if (list(ele[1])[0]).isdigit():
                    listo = "Ordered"
                else:
                    listo = "UnOrdered"


            #adding list in docx
            elif ele[0] == "List Data":
                if listo == "Ordered":

                    para = doc.add_paragraph(ele[1],
                                      style='List Number')
                    listo=""
                else:
                    para = doc.add_paragraph(ele[1],
                                      style='List Bullet')
                    listo = ""

                if ele[4] == "true":
                    para.italic = True
                if ele[5] > 300:
                    para.bold = True


            #adding figure in docx
            elif ele[0] == "Figure":
                print(ele)
                doc.add_picture("C:\\Users\\ANSU\\Desktop\\MNF\\convertBook\\contents\\"+str(ele[1]), width=Pt(int(ele[5])-int(ele[3])), height=Pt(int(ele[6])-int(ele[4])))
                last_paragraph = doc.paragraphs[-1]
                last_paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER


        doc.add_page_break()
    # save to file
    doc.save("hr_data11111111.docx")


def convert_books(inputfile):

    outputzipname = str((str(inputfile).split("."))[0]) + ".zip"


    # ext = str(inputfile).split(".")
    # if ext[-1] == "pdf":
    #     pass
    #
    # def convert_to_pdf(input_docx, out_folder):
    #     p = subprocess.Popen(
    #         [
    #             "libreoffice",
    #             "--headless",
    #             "--convert-to",
    #             "pdf",
    #             "--outdir",
    #             out_folder,
    #             input_docx,
    #         ]
    #     )
    #     print(["--convert-to", "pdf", input_docx])
    #     p.communicate()

    print(outputzipname)
    fldrs = str(outputzipname).split("/")
    leno = len(fldrs)
    # for i in range(leno):
    #     if i+1 == leno:
    print("1111111111111111111111111111111111111111111111111111111111111111111111111")
    print(fldrs[-2])
    fldrs[-2] = fldrs[-2] + "/zdddips/"
    print(fldrs[-2])
    #print(fldrs[i+1])
    outputziploc = "/home/user"
    pdf_text_images_extractor(api_creds, inputfile, outputzipname)
    print("6666666666666666666666666666666666666666666666666666666666666666666666666666")
    #zip_extractor(outputzipname)
    return 1


# input2 = "C:\\Users\\ANSU\\Downloads\\testtt12.pdf"
# inputfile = "C:\\Users\\ANSU\\Desktop\\MNF\\convertBook\\Adobe\\adobe-dc-pdf-services-sdk-extract-python-samples\\resources\\ihuuh_tnew.pdf"
# outputzipname = "someoutput2.zip"
# json_file = "C:\\Users\\ANSU\\Desktop\\MNF\\convertBook\\contents\\structuredData.json"


#convert_books("/home/user/mnf/project/MNF/conversion/booktranslator/ihuuh_tnew.pdf")
#pdf_text_images_extractor(api_creds,"/home/user/mnf/project/MNF/conversion/booktranslator/ihuuh_tnew.pdf","output.zip")

#zip_extractor(outputzipname)

# all_pages_data = json_parser(json_file)
# #print(all_pages_data)
# word_creator(all_pages_data)
first commit 2024-04-27 09:33:09 +00:00			`#for extraction of text and images from pdf`
			`import logging`
			`import os.path`
			`from adobe.pdfservices.operation.auth.credentials import Credentials`
			`from adobe.pdfservices.operation.exception.exceptions import ServiceApiException, ServiceUsageException, SdkException`
			`from adobe.pdfservices.operation.pdfops.options.extractpdf.extract_pdf_options import ExtractPDFOptions`
			`from adobe.pdfservices.operation.pdfops.options.extractpdf.extract_renditions_element_type import \`
			`ExtractRenditionsElementType`
			`from adobe.pdfservices.operation.pdfops.options.extractpdf.extract_element_type import ExtractElementType`
			`from adobe.pdfservices.operation.execution_context import ExecutionContext`
			`from adobe.pdfservices.operation.io.file_ref import FileRef`
			`from adobe.pdfservices.operation.pdfops.extract_pdf_operation import ExtractPDFOperation`

			`#for zip extraction`
			`from zipfile import ZipFile`

			`#for parsing json`
			`import json`


			`#for adding tables in docx`
			`from docx.enum.text import WD_ALIGN_PARAGRAPH`
			`from docx import Document`
			`import pandas as pd`
			`from docx.shared import Mm`
			`from docx.shared import Inches, Cm, Pt`
			`from docx.oxml.shared import OxmlElement`
			`from docx.oxml.ns import qn`


			`api_creds = "/home/user/mnf/project/MNF/conversion/booktranslator/api_creds/pdfservices-api-credentials.json"`
			`logging.basicConfig(level=os.environ.get("LOGLEVEL", "INFO"))`



			`def set_cell_margins(cell, **kwargs):`

			`tc = cell._tc`
			`tcPr = tc.get_or_add_tcPr()`
			`tcMar = OxmlElement('w:tcMar')`

			`for m in ["top", "start", "bottom", "end"]:`
			`if m in kwargs:`
			`node = OxmlElement("w:{}".format(m))`
			`node.set(qn('w:w'), str(kwargs.get(m)))`
			`node.set(qn('w:type'), 'dxa')`
			`tcMar.append(node)`

			`tcPr.append(tcMar)`



			`def add_table_to_doc(doc, df):`

			`columns = list(df.columns)`

			`table = doc.add_table(rows=1, cols=len(columns), style="Table Grid")`
			`table.autofit = True`

			`for col in range(len(columns)):`
			`set_cell_margins(table.cell(0, col), top=100, start=100, bottom=100, end=50)`
			`table.cell(0, col).text = columns[col].replace(" _x000D_", "").capitalize()`


			`for i, row in enumerate(df.itertuples()):`
			`table_row = table.add_row().cells`
			`for col in range(len(columns)):`
			`set_cell_margins(table_row[col], top=100, start=100, bottom=100, end=50)`
			`table_row[col].text = str(row[col + 1]).replace(" _x000D_", "")`

			`return doc`



			`def pdf_text_images_extractor(api_creds, inputFile, outputzip):`
			`try:`
			`# get base path.`
			`base_path = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))`

			`# Initial setup, create credentials instance.`
			`credentials = Credentials.service_account_credentials_builder() \`
			`.from_file(api_creds) \`
			`.build()`

			`# Create an ExecutionContext using credentials and create a new operation instance.`
			`execution_context = ExecutionContext.create(credentials)`
			`extract_pdf_operation = ExtractPDFOperation.create_new()`

			`# Set operation input from a source file.`
			`source = FileRef.create_from_local_file(inputFile)`
			`extract_pdf_operation.set_input(source)`

			`# Build ExtractPDF options and set them into the operation`
			`extract_pdf_options: ExtractPDFOptions = ExtractPDFOptions.builder() \`
			`.with_elements_to_extract([ExtractElementType.TEXT, ExtractElementType.TABLES]) \`
			`.with_elements_to_extract_renditions([ExtractRenditionsElementType.TABLES,`
			`ExtractRenditionsElementType.FIGURES]) \`
			`.build()`
			`extract_pdf_operation.set_options(extract_pdf_options)`

			`# Execute the operation.`
			`result: FileRef = extract_pdf_operation.execute(execution_context)`
			`# print("33333333333333333333333333")`
			`# print(result)`
			`# f = open("myfile.txt", "wb")`
			`# print("12121212")`
			`# # Save the result to the specified location.`
			`# result.write_to_stream(f)`
			`# print("ttttttttttttt")`
			`# f.close()`
			`result.save_as("/tmp/extra")`
			`except (ServiceApiException, ServiceUsageException, SdkException):`
			`logging.exception("Exception encountered while executing operation")`



			`def zip_extractor(filename):`

			`with ZipFile(filename, 'r') as zipObj:`
			`# Extract all the contents of zip file in current directory`
			`zipObj.extractall("contents")`



			`def json_parser(filename):`
			`# Opening JSON file`
			`f = open(filename, encoding="utf8")`

			`# returns JSON object as a dictionary`
			`data = json.load(f)`

			`# Iterating through the json list`
			`print(data['extended_metadata']['page_count'])`
			`print(data['extended_metadata']['language'])`


			`all_pages_data = []`
			`curr_page_contents = []`
			`current_page = 0`

			`for element in data['elements']:`

			`#for detection of headings and paragraphs`
			`if list(element['Path'])[11] == "H" or list(element['Path'])[11] == "P":`

			`if current_page == element["Page"]:`
			`pass`
			`else:`
			`all_pages_data.append(curr_page_contents)`
			`current_page += 1`
			`curr_page_contents = []`

			`current_element = ["Text",element["Text"],element["TextSize"], element["Font"]["family_name"],`
			`element["Font"]["italic"],element["Font"]["weight"]]`
			`try:`
			`output = element["attributes"]["SpaceAfter"]`
			`current_element.append(output)`
			`except:`
			`current_element.append("")`
			`try:`
			`output = element["attributes"]["TextAlign"]`
			`current_element.append(output)`
			`except:`
			`current_element.append("")`

			`curr_page_contents.append(current_element)`



			`#for detection of a list between paragraphs`
			`elif list(element['Path'])[11] == "L":`

			`if current_page == element["Page"]:`
			`pass`
			`else:`
			`all_pages_data.append(curr_page_contents)`
			`current_page += 1`
			`curr_page_contents = []`

			`differ_creator = (element["Path"]).split("/")`
			`if differ_creator[-1] == "Lbl":`
			`current_element = ["List Numbering", element["Text"], element["TextSize"], element["Font"]["family_name"],`
			`element["Font"]["italic"],element["Font"]["weight"]]`
			`else:`
			`current_element = ["List Data", element["Text"], element["TextSize"],`
			`element["Font"]["family_name"],`
			`element["Font"]["italic"],element["Font"]["weight"]]`

			`curr_page_contents.append(current_element)`



			`#for detection of figures`
			`elif list(element['Path'])[11] == "F":`

			`if current_page == element["Page"]:`
			`pass`
			`else:`
			`all_pages_data.append(curr_page_contents)`
			`current_page += 1`
			`curr_page_contents = []`

			`current_element = ["Figure",element["filePaths"][0],element["attributes"]["Placement"],`
			`element["attributes"]["BBox"][0],element["attributes"]["BBox"][1],`
			`element["attributes"]["BBox"][2],element["attributes"]["BBox"][3]]`


			`curr_page_contents.append(current_element)`



			`#for detection of tables`
			`elif list(element['Path'])[11] == "S":`

			`if current_page == element["Page"]:`
			`pass`
			`else:`
			`all_pages_data.append(curr_page_contents)`
			`current_page += 1`
			`curr_page_contents = []`

			`if list(element['Path'])[11:21] == "Sect/Table":`
			`curr_page_contents.append(["Table",element["attributes"]["NumRow"],element["attributes"]["NumCol"],element["filePaths"][0]])`
			`else:`
			`pass`



			`all_pages_data.append(curr_page_contents)`
			`# Closing file`
			`f.close()`
			`return all_pages_data`



			`def word_creator(all_data):`
			`listo = ""`

			`doc = Document()`

			`for page in all_data:`

			`for ele in page:`



			`#writing text in docx`
			`if ele[0] == "Text":`

			`style = doc.styles['Normal']`
			`font = style.font`
			`font.name = str(ele[3])`
			`font.size = Pt(int(ele[2]))`
			`act = doc.add_paragraph(style=style)`
			`act_format = act.paragraph_format`
			`if ele[6] == "":`
			`act_format.space_after = Pt(12)`
			`else:`
			`act_format.space_after = Pt(int(ele[6]))`
			`if ele[7] == "":`
			`act_format.alignment = WD_ALIGN_PARAGRAPH.JUSTIFY`
			`else:`
			`if ele[7] == "Justify":`
			`act_format.alignment = WD_ALIGN_PARAGRAPH.JUSTIFY`
			`elif ele[7] == "Start":`
			`act_format.alignment = WD_ALIGN_PARAGRAPH.LEFT`
			`elif ele[7] == "Center":`
			`act_format.alignment = WD_ALIGN_PARAGRAPH.CENTER`
			`elif ele[7] == "End":`
			`act_format.alignment = WD_ALIGN_PARAGRAPH.RIGHT`
			`else:`
			`act_format.alignment = WD_ALIGN_PARAGRAPH.JUSTIFY`
			`act_format.line_spacing = Pt(12)`
			`act_format.left_indent = Inches(0)`
			`# if (non_dial_dest_lang == 'hi') or (non_dial_dest_lang == 'gu'):`
			`# act.style.font.name = 'Mangal'`
			`# else:`
			`# act.style.font.name = 'Courier New'`
			`para = act.add_run(ele[1])`
			`if ele[4] == "true":`
			`para.italic = True`
			`if ele[5] > 400:`
			`para.bold = True`



			`#adding table in docx`
			`elif ele[0] == "Table":`

			`# read xlsx file`
			`hr_df = pd.read_excel('C:\\Users\\ANSU\\Desktop\\MNF\\convertBook\\contents_table\\tables\\fileoutpart0.xlsx')`


			`doc = Document()`
			`section = doc.sections[0]`
			`section.left_margin = Mm(5)`
			`section.right_margin = Mm(5)`

			`# add tables`
			`add_table_to_doc(doc, hr_df.iloc[:5])`



			`#adding list in docx`
			`elif ele[0] == "List Numbering":`

			`if (list(ele[1])[0]).isdigit():`
			`listo = "Ordered"`
			`else:`
			`listo = "UnOrdered"`



			`#adding list in docx`
			`elif ele[0] == "List Data":`
			`if listo == "Ordered":`

			`para = doc.add_paragraph(ele[1],`
			`style='List Number')`
			`listo=""`
			`else:`
			`para = doc.add_paragraph(ele[1],`
			`style='List Bullet')`
			`listo = ""`

			`if ele[4] == "true":`
			`para.italic = True`
			`if ele[5] > 300:`
			`para.bold = True`



			`#adding figure in docx`
			`elif ele[0] == "Figure":`
			`print(ele)`
			`doc.add_picture("C:\\Users\\ANSU\\Desktop\\MNF\\convertBook\\contents\\"+str(ele[1]), width=Pt(int(ele[5])-int(ele[3])), height=Pt(int(ele[6])-int(ele[4])))`
			`last_paragraph = doc.paragraphs[-1]`
			`last_paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER`




			`doc.add_page_break()`
			`# save to file`
			`doc.save("hr_data11111111.docx")`




			`def convert_books(inputfile):`

			`outputzipname = str((str(inputfile).split("."))[0]) + ".zip"`


			`# ext = str(inputfile).split(".")`
			`# if ext[-1] == "pdf":`
			`# pass`
			`#`
			`# def convert_to_pdf(input_docx, out_folder):`
			`# p = subprocess.Popen(`
			`# [`
			`# "libreoffice",`
			`# "--headless",`
			`# "--convert-to",`
			`# "pdf",`
			`# "--outdir",`
			`# out_folder,`
			`# input_docx,`
			`# ]`
			`# )`
			`# print(["--convert-to", "pdf", input_docx])`
			`# p.communicate()`

			`print(outputzipname)`
			`fldrs = str(outputzipname).split("/")`
			`leno = len(fldrs)`
			`# for i in range(leno):`
			`# if i+1 == leno:`
			`print("1111111111111111111111111111111111111111111111111111111111111111111111111")`
			`print(fldrs[-2])`
			`fldrs[-2] = fldrs[-2] + "/zdddips/"`
			`print(fldrs[-2])`
			`#print(fldrs[i+1])`
			`outputziploc = "/home/user"`
			`pdf_text_images_extractor(api_creds, inputfile, outputzipname)`
			`print("6666666666666666666666666666666666666666666666666666666666666666666666666666")`
			`#zip_extractor(outputzipname)`
			`return 1`


			`# input2 = "C:\\Users\\ANSU\\Downloads\\testtt12.pdf"`
			`# inputfile = "C:\\Users\\ANSU\\Desktop\\MNF\\convertBook\\Adobe\\adobe-dc-pdf-services-sdk-extract-python-samples\\resources\\ihuuh_tnew.pdf"`
			`# outputzipname = "someoutput2.zip"`
			`# json_file = "C:\\Users\\ANSU\\Desktop\\MNF\\convertBook\\contents\\structuredData.json"`


			`#convert_books("/home/user/mnf/project/MNF/conversion/booktranslator/ihuuh_tnew.pdf")`
			`#pdf_text_images_extractor(api_creds,"/home/user/mnf/project/MNF/conversion/booktranslator/ihuuh_tnew.pdf","output.zip")`

			`#zip_extractor(outputzipname)`

			`# all_pages_data = json_parser(json_file)`
			`# #print(all_pages_data)`
			`# word_creator(all_pages_data)`