azure-ai-vision-imageanalysis-py

Azure AI Vision Image Analysis SDK for Python

Client library for Azure AI Vision 4.0 image analysis including captions, tags, objects, OCR, and more.

Installation

pip install azure-ai-vision-imageanalysis

Environment Variables

VISION_ENDPOINT=https://<resource>.cognitiveservices.azure.com VISION_KEY=<your-api-key> # If using API key

Authentication

API Key

import os from azure.ai.vision.imageanalysis import ImageAnalysisClient from azure.core.credentials import AzureKeyCredential

endpoint = os.environ["VISION_ENDPOINT"] key = os.environ["VISION_KEY"]

client = ImageAnalysisClient( endpoint=endpoint, credential=AzureKeyCredential(key) )

Entra ID (Recommended)

from azure.ai.vision.imageanalysis import ImageAnalysisClient from azure.identity import DefaultAzureCredential

client = ImageAnalysisClient( endpoint=os.environ["VISION_ENDPOINT"], credential=DefaultAzureCredential() )

Analyze Image from URL

from azure.ai.vision.imageanalysis.models import VisualFeatures

image_url = "https://example.com/image.jpg"

result = client.analyze_from_url( image_url=image_url, visual_features=[ VisualFeatures.CAPTION, VisualFeatures.TAGS, VisualFeatures.OBJECTS, VisualFeatures.READ, VisualFeatures.PEOPLE, VisualFeatures.SMART_CROPS, VisualFeatures.DENSE_CAPTIONS ], gender_neutral_caption=True, language="en" )

Analyze Image from File

with open("image.jpg", "rb") as f: image_data = f.read()

result = client.analyze( image_data=image_data, visual_features=[VisualFeatures.CAPTION, VisualFeatures.TAGS] )

Image Caption

result = client.analyze_from_url( image_url=image_url, visual_features=[VisualFeatures.CAPTION], gender_neutral_caption=True )

if result.caption: print(f"Caption: {result.caption.text}") print(f"Confidence: {result.caption.confidence:.2f}")

Dense Captions (Multiple Regions)

result = client.analyze_from_url( image_url=image_url, visual_features=[VisualFeatures.DENSE_CAPTIONS] )

if result.dense_captions: for caption in result.dense_captions.list: print(f"Caption: {caption.text}") print(f" Confidence: {caption.confidence:.2f}") print(f" Bounding box: {caption.bounding_box}")

Tags

result = client.analyze_from_url( image_url=image_url, visual_features=[VisualFeatures.TAGS] )

if result.tags: for tag in result.tags.list: print(f"Tag: {tag.name} (confidence: {tag.confidence:.2f})")

Object Detection

result = client.analyze_from_url( image_url=image_url, visual_features=[VisualFeatures.OBJECTS] )

if result.objects: for obj in result.objects.list: print(f"Object: {obj.tags[0].name}") print(f" Confidence: {obj.tags[0].confidence:.2f}") box = obj.bounding_box print(f" Bounding box: x={box.x}, y={box.y}, w={box.width}, h={box.height}")

OCR (Text Extraction)

result = client.analyze_from_url( image_url=image_url, visual_features=[VisualFeatures.READ] )

if result.read: for block in result.read.blocks: for line in block.lines: print(f"Line: {line.text}") print(f" Bounding polygon: {line.bounding_polygon}")

        # Word-level details
        for word in line.words:
            print(f"  Word: {word.text} (confidence: {word.confidence:.2f})")

People Detection

result = client.analyze_from_url( image_url=image_url, visual_features=[VisualFeatures.PEOPLE] )

if result.people: for person in result.people.list: print(f"Person detected:") print(f" Confidence: {person.confidence:.2f}") box = person.bounding_box print(f" Bounding box: x={box.x}, y={box.y}, w={box.width}, h={box.height}")

Smart Cropping

result = client.analyze_from_url( image_url=image_url, visual_features=[VisualFeatures.SMART_CROPS], smart_crops_aspect_ratios=[0.9, 1.33, 1.78] # Portrait, 4:3, 16:9 )

if result.smart_crops: for crop in result.smart_crops.list: print(f"Aspect ratio: {crop.aspect_ratio}") box = crop.bounding_box print(f" Crop region: x={box.x}, y={box.y}, w={box.width}, h={box.height}")

Async Client

from azure.ai.vision.imageanalysis.aio import ImageAnalysisClient from azure.identity.aio import DefaultAzureCredential

async def analyze_image(): async with ImageAnalysisClient( endpoint=endpoint, credential=DefaultAzureCredential() ) as client: result = await client.analyze_from_url( image_url=image_url, visual_features=[VisualFeatures.CAPTION] ) print(result.caption.text)

Visual Features

Feature Description

CAPTION

Single sentence describing the image

DENSE_CAPTIONS

Captions for multiple regions

azure-ai-vision-imageanalysis-py

Safety Notice

Copy this and send it to your AI assistant to learn

Source Transparency

Related Skills

github-issue-creator

azure-observability

azure-appconfiguration-java

azure-aigateway