InvalidDocumentAccessLevel Cannot access source document location with the current permissions.

Yusuf Demir 5 Reputation points
2024-03-17T05:01:28.2066667+00:00

Hi,

I am getting this error for the following code I have. Can my sourceuri be my local computer directory as mentioned or is supposed to be blob with uri?
This is my code:
import os

import fitz # PyMuPDF for PDF handling

from docx import Document

from langdetect import detect

from azure.core.credentials import AzureKeyCredential

from azure.ai.translation.document import DocumentTranslationClient

import hashlib

from glob import glob

import pytesseract

from PIL import Image

import logging

Setup logging

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

Initialize Azure Document Translation client

endpoint = "https://xxxx.cognitiveservices.azure.com/"

credential = AzureKeyCredential("xxxx")

client = DocumentTranslationClient(endpoint, credential)

output_dir = "/xxx/xxx/xxx/xxx/xxx/"

os.makedirs(output_dir, exist_ok=True)

def detect_language(text):

try:

    return detect(text)

except:

    return "error"

def ocr_pdf_page(page):

pix = page.get_pixmap()

img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)

text = pytesseract.image_to_string(img)

return text

def process_pdf(file_path):

doc = fitz.open(file_path)

text = ''

for page in doc:

    page_text = page.get_text().strip()

    if not page_text:  # If no text is extracted, use OCR

        page_text = ocr_pdf_page(page)

    text += page_text + "\n"

return text

def process_docx(file_path):

doc = Document(file_path)

text = ''

for para in doc.paragraphs:

    text += para.text + '\n'

return text

def translate_and_save_file(file_path, target_language="en"):

try:

    base_name = os.path.basename(file_path)

    name_without_ext, _ = os.path.splitext(base_name)

    translated_name = name_without_ext[:200]

    name_hash = hashlib.md5(translated_name.encode()).hexdigest()[:8]

    new_file_name = f"{translated_name}_{name_hash}_translated.txt"

    new_file_path = os.path.join(output_dir, new_file_name)

    # Pass 'source_url', 'target_url', and 'target_language' for a single input

    poller = client.begin_translation(

        source_url=file_path,

        target_url=new_file_path,

        target_language=target_language

    )

    result = poller.result()

    logging.info(f"Translated and saved: {new_file_path}")

except Exception as e:

    logging.error(f"Failed to translate and save {file_path}: {e}")

Iterate over PDFs and DOCXs

for file_path in glob(os.path.join(output_dir, '.pdf')) + glob(os.path.join(output_dir, '.docx')):

logging.info(f"Processing file: {file_path}")

lang = detect_language(process_pdf(file_path) if file_path.endswith('.pdf') else process_docx(file_path))

if lang.startswith('zh'):

    translate_and_save_file(file_path)

this is the error:

2024-03-17 00:51:32,444 - ERROR - Failed to translate and save /xxx/xxx/xxx/xxx/xxx/xxxxxx.pdf: (InvalidDocumentAccessLevel): Cannot access source document location with the current permissions.
Azure Translator
Azure Translator
An Azure service to easily conduct machine translation with a simple REST API call.
408 questions
{count} vote

Your answer

Answers can be marked as Accepted Answers by the question author, which helps users to know the answer solved the author's problem.