Model id change

Shambhu Rai 1,411 Reputation points
2022-05-10T19:38:42.04+00:00

Hi Expert,

I am trying to use model id for form recognizer py script. Here is my code . currently I am using endpoint url and api key and wanted to export data. how can use it .. here is the code

from django.shortcuts import render
import os
from django.http import HttpResponse
import csv
import re
from azure.core.credentials import AzureKeyCredential
from azure.ai.formrecognizer import FormRecognizerClient
from azure.storage.blob import BlobClient


# Create your views here.

def download_blob(blob_name, output_path):
    """
    Download
    :param blob_name:
    :param output_path:
    :return:
    """
    _, filename = os.path.split(blob_name)
    destination_file = os.path.join(output_path, filename)

    blob_client = BlobClient.from_connection_string(
        conn_str='DefaultEndpointsProtocol=https;AccountName=demoretail;AccountKey=jSZtsbMoGpmViFuWtTXDwEJEktIs24oUAIPSz9tSiZ25zCPe0mFRWC6V0gvlZCcGU0HcxCTdV1GsAl5vMwnanA==;EndpointSuffix=core.windows.net',
        container_name='demo',
        blob_name=blob_name
    )
    with open(destination_file, "wb") as my_blob:
        blob_data = blob_client.download_blob()
        blob_data.readinto(my_blob)

    return destination_file


def recognize_form_tables(form_path):
    endpoint = https://Test1.cognitiveservices.azure.com/
    credential = AzureKeyCredential("<key>")
    form_recognizer_client = FormRecognizerClient(endpoint, credential)

    with open(form_path, "rb") as fd:
        form = fd.read()

    os.remove(form_path)

    response = form_recognizer_client.begin_recognize_content(form)
    form_pages = response.result()

    tables = []
    table_label_data = []
    port_regex = '^col1:(.*)'
    header_regex = '.*col1:(.*)Area Name:(.*)Month Reporting:\s*([A-Za-z]{3}-[0-9]{2}).*'

    table_index = -1
    for content in form_pages:
        for table in content.tables:
            tables.append(table)

        table_header = ''
        i = 0
        flag = False
        for line_idx, line in enumerate(content.lines):
            port_line = re.findall(port_regex, line.text)
            if port_line:
                table_index += 1
                i = 0
                flag = True

            if flag and i < 10 :
                table_header += line.text + ' '

            if i == 10:
                header_match = re.match(header_regex, table_header)
                if header_match:
                    gr = header_match.groups()
                    table_label_data.append([gr[0], gr[1], gr[2]])
                table_header = ''
                flag = False

            i += 1

    return tables, table_label_data


def create_csv(table, path):
    with open(path, 'a') as f:
        writer = csv.writer(f)
        for row in table:
            if len(row) < 10 or not row[3]:
                continue
            writer.writerow(row)


def create_csv_data(tables, table_label_data):
    count = 0
    for t in tables:
        count += 1
        table_data = []
        row_index = -1
        for cell in t.cells:
            cell = cell.to_dict()

            if count > 1 and 'is_header' in cell and cell['is_header']:
                continue
            elif cell['row_index'] == row_index or (count > 1 and cell['row_index'] == row_index + 1):
                table_data[row_index].append(cell['text'])
            else:
                row_index += 1
                if 'is_header' in cell and cell['is_header']:
                    table_data.append(['Port', 'Area Name', 'Month Reporting'])
                else:
                    table_data.append([])
                    if len(table_label_data) > count:
                        table_data[row_index] = table_label_data[count - 1] + table_data[row_index]
                table_data[row_index].append(cell['text'])

        create_csv(table_data, f'table.csv')
    print('Created or updated table.csv file.')


def index(request):
    form_path = download_blob('Test.pdf', '')
    tables, table_label_data = recognize_form_tables(form_path)
    print('form recognize success')
    create_csv_data(tables, table_label_data)
    with open('table.csv', newline='') as in_file:
        with open('Test.csv', 'w', newline='') as out_file:
            writer = csv.writer(out_file)
            for row in csv.reader(in_file):
                if row:
                    writer.writerow(row)
    return HttpResponse("Load Succeeded")

 
Azure AI Document Intelligence
Azure AI Document Intelligence
An Azure service that turns documents into usable data. Previously known as Azure Form Recognizer.
1,532 questions
{count} votes