Hello,
I have trained a Custom Neural Model under Document Intelligence API Version 2023-07-31.
When performing inference via the API, the "apiVersion" field is not consistent with the trained API version. Inference returns a response with API version 2024-02-29-preview. The problem is consistent across custom neural models and prebuilt-layout. The issue persists when specifying a model version directly in the HTTP request.
Can anyone recreate this behavior, and could MS please advise on or fix this issue?
Test Case 1: Custom Neural Model
Model:

Code:
import os
import yaml
from azure.core.credentials import AzureKeyCredential
from azure.ai.documentintelligence import DocumentIntelligenceClient
def load_azure_config(directory="."):
filename = "azure_config.yaml"
filepath = os.path.join(directory, filename)
with open(filepath, "r") as stream:
try:
azure_config = yaml.safe_load(stream)
except yaml.YAMLError as exc:
print(exc)
return azure_config
def get_doc_client(AzureKeys):
key = AzureKeys["DocumentIntelligence"]["KEY_1"]
endpoint = AzureKeys["DocumentIntelligence"]["AZURE_ENDPOINT"]
return DocumentIntelligenceClient(endpoint, AzureKeyCredential(key))
def analyze_document(filepath, doc_client, model_id):
with open(filepath, "rb") as f:
poller = doc_client.begin_analyze_document(
model_id=model_id,
analyze_request=f,
content_type="application/octet-stream"
)
return poller.result()
# Main execution
if __name__ == "__main__":
# Load Azure configuration
AzureKeys = load_azure_config()['AzureKeys']
# Set up the Document Intelligence client
doc_client = get_doc_client(AzureKeys)
# Define the input file and model ID
input_file = r"C:\Users\xxxxxx\Downloads\AIAI2021 (1).pdf"
model_id = "test-response"
# Analyze the document
result = analyze_document(input_file, doc_client, model_id)
# Print the result (you can modify this to process the result as needed)
print(result.as_dict())
Response:
{'apiVersion': '2024-02-29-preview', 'modelId': 'test-response', 'stringIndexType': 'textElements', 'content': 'Goldsmiths Research Online\nCitation\nGoldsmiths Research Online (GRO) is the institutional research repository for Goldsmiths, University of London\nOlaniyan, Rapheal; Stamate, Daniel and Pu, Ida. 2021. \'A Two-Step Optimised BERT-Based NLP Algorithm for Extracting Sentiment from Financial News\' ... }
Test Case 2: Prebuilt Layout Model
Model:
prebuilt-layout version 2024-07-31
Code:
import os
import yaml
import requests
import time
def load_azure_config(directory="."):
"""
Loads Azure configuration from a YAML file.
"""
filename = "azure_config.yaml"
filepath = os.path.join(directory, filename)
with open(filepath, "r") as stream:
try:
azure_config = yaml.safe_load(stream)
except yaml.YAMLError as exc:
print(exc)
return azure_config
def analyze_document_rest(filepath, azure_keys, model_id, api_version="2023-07-31"):
"""
Submits a document for analysis using the Azure Document Intelligence REST API and polls for results.
Args:
filepath (str): Path to the document file to analyze.
azure_keys (dict): Azure keys and endpoint information.
model_id (str): Document model ID (e.g., 'prebuilt-layout').
api_version (str): API version to use.
Returns:
dict: Analysis results as a dictionary, or None if an error occurred.
"""
endpoint = azure_keys["DocumentIntelligence"]["AZURE_ENDPOINT"]
key = azure_keys["DocumentIntelligence"]["KEY_1"]
# Correct the REST API URL
url = f"{endpoint}/formrecognizer/documentModels/{model_id}:analyze?api-version={api_version}"
headers = {
"Ocp-Apim-Subscription-Key": key,
"Content-Type": "application/octet-stream",
}
try:
# Read the file and send the request
with open(filepath, "rb") as file_data:
response = requests.post(url, headers=headers, data=file_data)
# Handle initial response
if response.status_code == 404:
print(
"Error 404: Endpoint or resource not found. Check your endpoint, model ID, or API version.")
return None
elif response.status_code != 202:
print(f"Error: {response.status_code}")
try:
print(response.json()) # Print error details if available
except Exception:
print(response.text) # Fallback if JSON decoding fails
return None
# Poll for the result using the operation-location header
operation_location = response.headers.get("Operation-Location")
if not operation_location:
print("Error: Operation-Location header not found.")
return None
print("Document analysis submitted. Polling for results...")
# Polling for result
while True:
poll_response = requests.get(operation_location, headers={
"Ocp-Apim-Subscription-Key": key})
if poll_response.status_code != 200:
print(f"Polling Error: {poll_response.status_code}")
print(poll_response.text)
return None
poll_data = poll_response.json()
status = poll_data.get("status")
if status == "succeeded":
print("Analysis succeeded.")
return poll_data
elif status == "failed":
print("Analysis failed.")
print(poll_data)
return None
# Wait and retry polling
print("Waiting for 5 seconds before retrying...")
time.sleep(5)
except Exception as e:
print(f"An error occurred: {e}")
return None
# Load Azure configuration
azure_keys = load_azure_config()["AzureKeys"]
# Define the input file and model ID
input_file = r"C:\Users\xxxxxxx\Downloads\AIAI2021 (1).pdf"
model_id = "prebuilt-layout"
# Specify the API version
api_version = "2023-07-31"
# Analyze the document using REST API
result = analyze_document_rest(input_file, azure_keys, model_id, api_version)
Response:
{'apiVersion': '2024-02-29-preview',
'modelId': 'prebuilt-layout',
'stringIndexType': 'textElements',
'content': 'Goldsmiths Research Online\nGoldsmiths '...}