I am trying to deploy azure function that convert html file to pdf using wkhtmltopdf using python?

Question

So, I am trying to deploy the azure function which uses wkhtmltopdf installed locally to convert html to pdf file. It is been deployed successfully. It is a blob trigger so whenever a html file is uploaded in blob it changes the html file to pdf.

so, the code is:

import logging
import os
import pdfkit
from azure.storage.blob import BlobServiceClient
import azure.functions as func

# Configure logging
logging.basicConfig(level=logging.DEBUG)

# Initialize BlobServiceClient
connection_string = os.getenv('AzureWebJobsStorage')
blob_service_client = BlobServiceClient.from_connection_string(connection_string)

# Function to convert HTML to PDF and upload the result
def convert_html_to_pdf_and_upload(input_blob_name, html_content, output_container_name):
    try:
        # Specify the path to wkhtmltopdf if necessary
        # path_to_wkhtmltopdf = r"C:\Program Files\wkhtmltopdf\bin\wkhtmltopdf.exe"  # Adjust this path as necessary
        # path_to_wkhtmltopdf = r"C:\Users\Lenovo\Desktop\Avinto\Documents Intelligence\HTML_TO_PDF\bin\wkhtmltopdf.exe"
        path_to_wkhtmltopdf = os.path.join(os.path.dirname(os.path.dirname(__file__)), 'bin', 'wkhtmltopdf.exe')
        if not os.path.isfile(path_to_wkhtmltopdf):
            raise FileNotFoundError(f"No wkhtmltopdf executable found at {path_to_wkhtmltopdf}")

        config = pdfkit.configuration(wkhtmltopdf=path_to_wkhtmltopdf)

        # Options to handle potential issues
        options = {
            'no-stop-slow-scripts': None,
            'disable-javascript': None,
            'load-error-handling': 'ignore',
            'load-media-error-handling': 'ignore'
        }

        # Convert HTML content to PDF
        pdf_content = pdfkit.from_string(html_content, False, configuration=config, options=options)

        # Generate the output blob name
        relative_path = input_blob_name[len("testinput/"):]
        base_name = os.path.splitext(relative_path)[0]
        output_blob_name = f"{base_name}.pdf"

        # Get a blob client for the output blob
        output_blob_client = blob_service_client.get_blob_client(container=output_container_name, blob=output_blob_name)

        # Upload the PDF content to the output blob
        output_blob_client.upload_blob(pdf_content, overwrite=True)

        logging.info(f"PDF uploaded to: {output_container_name}/{output_blob_name}")

    except Exception as e:
        logging.error(f"Failed to convert HTML to PDF for blob {input_blob_name}: {str(e)}")

# Function to copy a file to another container
def copy_blob_to_preprocess(input_blob_name, output_container_name):
    try:
        # Get a blob client for the input blob
        input_blob_client = blob_service_client.get_blob_client(container='testinput', blob=input_blob_name[len("testinput/"):])
        blob_content = input_blob_client.download_blob().readall()

        # Generate the output blob name
        relative_path = input_blob_name[len("testinput/"):]
        base_name = os.path.splitext(relative_path)[0]
        output_blob_name = f"{base_name}.pdf"

        # Get a blob client for the output blob
        output_blob_client = blob_service_client.get_blob_client(container=output_container_name, blob=output_blob_name)

        # Upload the content to the output blob
        output_blob_client.upload_blob(blob_content, overwrite=True)

        logging.info(f"Blob copied to: {output_container_name}/{output_blob_name}")

    except Exception as e:
        logging.error(f"Failed to copy blob {input_blob_name}: {str(e)}")

def main(myblob: func.InputStream):
    input_blob_name = myblob.name
    logging.info(f"Processing blob: {input_blob_name}")

    try:
        # Check if the file is in any folder within 'testinput'
        if input_blob_name.startswith('testinput/'):
            # Check if the file is .html or .htm
            if input_blob_name.lower().endswith(('.html', '.htm')):
                # Read HTML content from the blob
                html_content = myblob.read().decode('utf-8')

                # Convert HTML to PDF and upload the result
                convert_html_to_pdf_and_upload(input_blob_name, html_content, 'testpreprocess')

            elif input_blob_name.lower().endswith('.pdf'):
                # Copy PDF file to preprocess container
                copy_blob_to_preprocess(input_blob_name, 'testpreprocess')

            else:
                logging.info(f"Skipping non-HTML, non-PDF file: {input_blob_name}")

        else:
            logging.info(f"Skipping file not in 'testinput' container: {input_blob_name}")

    except Exception as e:
        logging.error(f"Failed to process blob {input_blob_name}: {str(e)}")

Give me necessary steps by step procedure to deploy the code.

The project file structure is :

User's image

So, can you provide necessary steps to deploy the code. With all explanation

Thank you

Share via

I am trying to deploy azure function that convert html file to pdf using wkhtmltopdf using python?

Your answer