Azure AI Document Translation SDK for Python
Client library for Azure AI Translator document translation service for batch document translation with format preservation.
Installation
pip install azure-ai-translation-document
Environment Variables
AZURE_DOCUMENT_TRANSLATION_ENDPOINT=https://<resource>.cognitiveservices.azure.com AZURE_DOCUMENT_TRANSLATION_KEY=<your-api-key> # If using API key
Storage for source and target documents
AZURE_SOURCE_CONTAINER_URL=https://<storage>.blob.core.windows.net/<container>?<sas> AZURE_TARGET_CONTAINER_URL=https://<storage>.blob.core.windows.net/<container>?<sas>
Authentication
API Key
import os from azure.ai.translation.document import DocumentTranslationClient from azure.core.credentials import AzureKeyCredential
endpoint = os.environ["AZURE_DOCUMENT_TRANSLATION_ENDPOINT"] key = os.environ["AZURE_DOCUMENT_TRANSLATION_KEY"]
client = DocumentTranslationClient(endpoint, AzureKeyCredential(key))
Entra ID (Recommended)
from azure.ai.translation.document import DocumentTranslationClient from azure.identity import DefaultAzureCredential
client = DocumentTranslationClient( endpoint=os.environ["AZURE_DOCUMENT_TRANSLATION_ENDPOINT"], credential=DefaultAzureCredential() )
Basic Document Translation
from azure.ai.translation.document import DocumentTranslationInput, TranslationTarget
source_url = os.environ["AZURE_SOURCE_CONTAINER_URL"] target_url = os.environ["AZURE_TARGET_CONTAINER_URL"]
Start translation job
poller = client.begin_translation( inputs=[ DocumentTranslationInput( source_url=source_url, targets=[ TranslationTarget( target_url=target_url, language="es" # Translate to Spanish ) ] ) ] )
Wait for completion
result = poller.result()
print(f"Status: {poller.status()}") print(f"Documents translated: {poller.details.documents_succeeded_count}") print(f"Documents failed: {poller.details.documents_failed_count}")
Multiple Target Languages
poller = client.begin_translation( inputs=[ DocumentTranslationInput( source_url=source_url, targets=[ TranslationTarget(target_url=target_url_es, language="es"), TranslationTarget(target_url=target_url_fr, language="fr"), TranslationTarget(target_url=target_url_de, language="de") ] ) ] )
Translate Single Document
from azure.ai.translation.document import SingleDocumentTranslationClient
single_client = SingleDocumentTranslationClient(endpoint, AzureKeyCredential(key))
with open("document.docx", "rb") as f: document_content = f.read()
result = single_client.translate( body=document_content, target_language="es", content_type="application/vnd.openxmlformats-officedocument.wordprocessingml.document" )
Save translated document
with open("document_es.docx", "wb") as f: f.write(result)
Check Translation Status
Get all translation operations
operations = client.list_translation_statuses()
for op in operations: print(f"Operation ID: {op.id}") print(f"Status: {op.status}") print(f"Created: {op.created_on}") print(f"Total documents: {op.documents_total_count}") print(f"Succeeded: {op.documents_succeeded_count}") print(f"Failed: {op.documents_failed_count}")
List Document Statuses
Get status of individual documents in a job
operation_id = poller.id document_statuses = client.list_document_statuses(operation_id)
for doc in document_statuses: print(f"Document: {doc.source_document_url}") print(f" Status: {doc.status}") print(f" Translated to: {doc.translated_to}") if doc.error: print(f" Error: {doc.error.message}")
Cancel Translation
Cancel a running translation
client.cancel_translation(operation_id)
Using Glossary
from azure.ai.translation.document import TranslationGlossary
poller = client.begin_translation( inputs=[ DocumentTranslationInput( source_url=source_url, targets=[ TranslationTarget( target_url=target_url, language="es", glossaries=[ TranslationGlossary( glossary_url="https://<storage>.blob.core.windows.net/glossary/terms.csv?<sas>", file_format="csv" ) ] ) ] ) ] )
Supported Document Formats
Get supported formats
formats = client.get_supported_document_formats()
for fmt in formats: print(f"Format: {fmt.format}") print(f" Extensions: {fmt.file_extensions}") print(f" Content types: {fmt.content_types}")
Supported Languages
Get supported languages
languages = client.get_supported_languages()
for lang in languages: print(f"Language: {lang.name} ({lang.code})")
Async Client
from azure.ai.translation.document.aio import DocumentTranslationClient from azure.identity.aio import DefaultAzureCredential
async def translate_documents(): async with DocumentTranslationClient( endpoint=endpoint, credential=DefaultAzureCredential() ) as client: poller = await client.begin_translation(inputs=[...]) result = await poller.result()
Supported Formats
Category Formats
Documents DOCX, PDF, PPTX, XLSX, HTML, TXT, RTF
Structured CSV, TSV, JSON, XML
Localization XLIFF, XLF, MHTML
Storage Requirements
-
Source and target containers must be Azure Blob Storage
-
Use SAS tokens with appropriate permissions:
-
Source: Read, List
-
Target: Write, List
Best Practices
-
Use SAS tokens with minimal required permissions
-
Monitor long-running operations with poller.status()
-
Handle document-level errors by iterating document statuses
-
Use glossaries for domain-specific terminology
-
Separate target containers for each language
-
Use async client for multiple concurrent jobs
-
Check supported formats before submitting documents