Azure AI Vision Image Analysis SDK for Java
Build image analysis applications using the Azure AI Vision Image Analysis SDK for Java.
Installation
<dependency> <groupId>com.azure</groupId> <artifactId>azure-ai-vision-imageanalysis</artifactId> <version>1.1.0-beta.1</version> </dependency>
Client Creation
With API Key
import com.azure.ai.vision.imageanalysis.ImageAnalysisClient; import com.azure.ai.vision.imageanalysis.ImageAnalysisClientBuilder; import com.azure.core.credential.KeyCredential;
String endpoint = System.getenv("VISION_ENDPOINT"); String key = System.getenv("VISION_KEY");
ImageAnalysisClient client = new ImageAnalysisClientBuilder() .endpoint(endpoint) .credential(new KeyCredential(key)) .buildClient();
Async Client
import com.azure.ai.vision.imageanalysis.ImageAnalysisAsyncClient;
ImageAnalysisAsyncClient asyncClient = new ImageAnalysisClientBuilder() .endpoint(endpoint) .credential(new KeyCredential(key)) .buildAsyncClient();
With DefaultAzureCredential
import com.azure.identity.DefaultAzureCredentialBuilder;
ImageAnalysisClient client = new ImageAnalysisClientBuilder() .endpoint(endpoint) .credential(new DefaultAzureCredentialBuilder().build()) .buildClient();
Visual Features
Feature Description
CAPTION
Generate human-readable image description
DENSE_CAPTIONS
Captions for up to 10 regions
READ
OCR - Extract text from images
TAGS
Content tags for objects, scenes, actions
OBJECTS
Detect objects with bounding boxes
SMART_CROPS
Smart thumbnail regions
PEOPLE
Detect people with locations
Core Patterns
Generate Caption
import com.azure.ai.vision.imageanalysis.models.*; import com.azure.core.util.BinaryData; import java.io.File; import java.util.Arrays;
// From file BinaryData imageData = BinaryData.fromFile(new File("image.jpg").toPath());
ImageAnalysisResult result = client.analyze( imageData, Arrays.asList(VisualFeatures.CAPTION), new ImageAnalysisOptions().setGenderNeutralCaption(true));
System.out.printf("Caption: "%s" (confidence: %.4f)%n", result.getCaption().getText(), result.getCaption().getConfidence());
Generate Caption from URL
ImageAnalysisResult result = client.analyzeFromUrl( "https://example.com/image.jpg", Arrays.asList(VisualFeatures.CAPTION), new ImageAnalysisOptions().setGenderNeutralCaption(true));
System.out.printf("Caption: "%s"%n", result.getCaption().getText());
Extract Text (OCR)
ImageAnalysisResult result = client.analyze( BinaryData.fromFile(new File("document.jpg").toPath()), Arrays.asList(VisualFeatures.READ), null);
for (DetectedTextBlock block : result.getRead().getBlocks()) { for (DetectedTextLine line : block.getLines()) { System.out.printf("Line: '%s'%n", line.getText()); System.out.printf(" Bounding polygon: %s%n", line.getBoundingPolygon());
for (DetectedTextWord word : line.getWords()) {
System.out.printf(" Word: '%s' (confidence: %.4f)%n",
word.getText(),
word.getConfidence());
}
}
}
Detect Objects
ImageAnalysisResult result = client.analyzeFromUrl( imageUrl, Arrays.asList(VisualFeatures.OBJECTS), null);
for (DetectedObject obj : result.getObjects()) { System.out.printf("Object: %s (confidence: %.4f)%n", obj.getTags().get(0).getName(), obj.getTags().get(0).getConfidence());
ImageBoundingBox box = obj.getBoundingBox();
System.out.printf(" Location: x=%d, y=%d, w=%d, h=%d%n",
box.getX(), box.getY(), box.getWidth(), box.getHeight());
}
Get Tags
ImageAnalysisResult result = client.analyzeFromUrl( imageUrl, Arrays.asList(VisualFeatures.TAGS), null);
for (DetectedTag tag : result.getTags()) { System.out.printf("Tag: %s (confidence: %.4f)%n", tag.getName(), tag.getConfidence()); }
Detect People
ImageAnalysisResult result = client.analyzeFromUrl( imageUrl, Arrays.asList(VisualFeatures.PEOPLE), null);
for (DetectedPerson person : result.getPeople()) { ImageBoundingBox box = person.getBoundingBox(); System.out.printf("Person at x=%d, y=%d (confidence: %.4f)%n", box.getX(), box.getY(), person.getConfidence()); }
Smart Cropping
ImageAnalysisResult result = client.analyzeFromUrl( imageUrl, Arrays.asList(VisualFeatures.SMART_CROPS), new ImageAnalysisOptions().setSmartCropsAspectRatios(Arrays.asList(1.0, 1.5)));
for (CropRegion crop : result.getSmartCrops()) { System.out.printf("Crop region: aspect=%.2f, x=%d, y=%d, w=%d, h=%d%n", crop.getAspectRatio(), crop.getBoundingBox().getX(), crop.getBoundingBox().getY(), crop.getBoundingBox().getWidth(), crop.getBoundingBox().getHeight()); }
Dense Captions
ImageAnalysisResult result = client.analyzeFromUrl( imageUrl, Arrays.asList(VisualFeatures.DENSE_CAPTIONS), new ImageAnalysisOptions().setGenderNeutralCaption(true));
for (DenseCaption caption : result.getDenseCaptions()) { System.out.printf("Caption: "%s" (confidence: %.4f)%n", caption.getText(), caption.getConfidence()); System.out.printf(" Region: x=%d, y=%d, w=%d, h=%d%n", caption.getBoundingBox().getX(), caption.getBoundingBox().getY(), caption.getBoundingBox().getWidth(), caption.getBoundingBox().getHeight()); }
Multiple Features
ImageAnalysisResult result = client.analyzeFromUrl( imageUrl, Arrays.asList( VisualFeatures.CAPTION, VisualFeatures.TAGS, VisualFeatures.OBJECTS, VisualFeatures.READ), new ImageAnalysisOptions() .setGenderNeutralCaption(true) .setLanguage("en"));
// Access all results System.out.println("Caption: " + result.getCaption().getText()); System.out.println("Tags: " + result.getTags().size()); System.out.println("Objects: " + result.getObjects().size()); System.out.println("Text blocks: " + result.getRead().getBlocks().size());
Async Analysis
asyncClient.analyzeFromUrl( imageUrl, Arrays.asList(VisualFeatures.CAPTION), null) .subscribe( result -> System.out.println("Caption: " + result.getCaption().getText()), error -> System.err.println("Error: " + error.getMessage()), () -> System.out.println("Complete") );
Error Handling
import com.azure.core.exception.HttpResponseException;
try { client.analyzeFromUrl(imageUrl, Arrays.asList(VisualFeatures.CAPTION), null); } catch (HttpResponseException e) { System.out.println("Status: " + e.getResponse().getStatusCode()); System.out.println("Error: " + e.getMessage()); }
Environment Variables
VISION_ENDPOINT=https://<resource>.cognitiveservices.azure.com/ VISION_KEY=<your-api-key>
Image Requirements
-
Formats: JPEG, PNG, GIF, BMP, WEBP, ICO, TIFF, MPO
-
Size: < 20 MB
-
Dimensions: 50x50 to 16000x16000 pixels
Regional Availability
Caption and Dense Captions require GPU-supported regions. Check supported regions before deployment.
Trigger Phrases
-
"image analysis Java"
-
"Azure Vision SDK"
-
"image captioning"
-
"OCR image text extraction"
-
"object detection image"
-
"smart crop thumbnail"
-
"detect people image"