Hello Fred,
I attached a code sample that works for the given case (at least for me :D):
from azure.core.credentials import AzureKeyCredential
from azure.ai.documentintelligence import DocumentIntelligenceClient
from azure.ai.documentintelligence.models import AnalyzeOutputOption
import os
def to_searchable_pdf(src_path: str, dst_path: str, endpoint: str, api_key: str) -> str:
client = DocumentIntelligenceClient(endpoint=endpoint, credential=AzureKeyCredential(api_key))
# 1) Submit analysis with prebuilt-read and ask for PDF output
with open(src_path, "rb") as f:
poller = client.begin_analyze_document(
model_id="prebuilt-read",
body=f, # file input
output=[AnalyzeOutputOption.PDF], # request searchable PDF
)
# 2) Wait for completion (this gives AnalyzeResult, not the PDF bytes)
result = poller.result()
# 3) Get the operation/result ID
result_id = poller.details["operation_id"]
# 4) Download the searchable PDF bytes and save to disk
pdf_stream = client.get_analyze_result_pdf(model_id=result.model_id, result_id=result_id)
with open(dst_path, "wb") as out:
out.writelines(pdf_stream)
return dst_path
if __name__ == "__main__":
print("Hello :)") # test print at start
# Example execution (replace with your own values)
endpoint = "https://<your>.cognitiveservices.azure.com/"
api_key = "<your_api_key>"
src_pdf = "scan.pdf"
dst_pdf = "scan.searchable.pdf"
output_file = to_searchable_pdf(src_pdf, dst_pdf, endpoint, api_key)
print(f"Searchable PDF saved at: {output_file}")
Hope that helps a bit :).
Best regards,
Moritz