InvalidDocumentAccessLevel Cannot access source document location with the current permissions.

Question

Hi,

I am getting this error for the following code I have. Can my sourceuri be my local computer directory as mentioned or is supposed to be blob with uri?
This is my code:
import os

import fitz # PyMuPDF for PDF handling

from docx import Document

from langdetect import detect

from azure.core.credentials import AzureKeyCredential

from azure.ai.translation.document import DocumentTranslationClient

import hashlib

from glob import glob

import pytesseract

from PIL import Image

import logging

Setup logging

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

Initialize Azure Document Translation client

endpoint = "https://xxxx.cognitiveservices.azure.com/"

credential = AzureKeyCredential("xxxx")

client = DocumentTranslationClient(endpoint, credential)

output_dir = "/xxx/xxx/xxx/xxx/xxx/"

os.makedirs(output_dir, exist_ok=True)

def detect_language(text):

try:

    return detect(text)

except:

    return "error"

def ocr_pdf_page(page):

pix = page.get_pixmap()

img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)

text = pytesseract.image_to_string(img)

return text

def process_pdf(file_path):

doc = fitz.open(file_path)

text = ''

for page in doc:

    page_text = page.get_text().strip()

    if not page_text:  # If no text is extracted, use OCR

        page_text = ocr_pdf_page(page)

    text += page_text + "
"

return text

def process_docx(file_path):

doc = Document(file_path)

text = ''

for para in doc.paragraphs:

    text += para.text + '
'

return text

def translate_and_save_file(file_path, target_language="en"):

try:

    base_name = os.path.basename(file_path)

    name_without_ext, _ = os.path.splitext(base_name)

    translated_name = name_without_ext[:200]

    name_hash = hashlib.md5(translated_name.encode()).hexdigest()[:8]

    new_file_name = f"{translated_name}_{name_hash}_translated.txt"

    new_file_path = os.path.join(output_dir, new_file_name)

    # Pass 'source_url', 'target_url', and 'target_language' for a single input

    poller = client.begin_translation(

        source_url=file_path,

        target_url=new_file_path,

        target_language=target_language

    )

    result = poller.result()

    logging.info(f"Translated and saved: {new_file_path}")

except Exception as e:

    logging.error(f"Failed to translate and save {file_path}: {e}")

Iterate over PDFs and DOCXs

for file_path in glob(os.path.join(output_dir, '.pdf')) + glob(os.path.join(output_dir, '.docx')):

logging.info(f"Processing file: {file_path}")

lang = detect_language(process_pdf(file_path) if file_path.endswith('.pdf') else process_docx(file_path))

if lang.startswith('zh'):

    translate_and_save_file(file_path)

this is the error:

2024-03-17 00:51:32,444 - ERROR - Failed to translate and save /xxx/xxx/xxx/xxx/xxx/xxxxxx.pdf: (InvalidDocumentAccessLevel): Cannot access source document location with the current permissions.

Share via

InvalidDocumentAccessLevel Cannot access source document location with the current permissions.

Setup logging

Initialize Azure Document Translation client

Iterate over PDFs and DOCXs

Your answer