AI Generation Client
Robust AI API integration with retry logic and content safety.
When to Use This Skill
-
Integrating with AI generation APIs (Gemini, OpenAI, etc.)
-
Need retry logic for flaky AI services
-
Handling rate limits gracefully
-
Detecting content policy violations
-
Supporting multi-turn refinements
Core Concepts
AI API integration requires:
-
Exponential backoff - Retry with increasing delays
-
Rate limit handling - Respect Retry-After headers
-
Content safety - Detect and handle policy violations
-
Multi-turn context - Enable cheaper refinements
Implementation
Python
import asyncio import base64 import time import uuid from dataclasses import dataclass from typing import Optional, List import aiohttp
@dataclass class GenerationRequest: prompt: str width: int height: int model: str = "gemini-2.0-flash-exp" seed: Optional[int] = None input_image: Optional[bytes] = None conversation_history: Optional[List[dict]] = None
@dataclass class GenerationResponse: image_data: bytes generation_id: str seed: int inference_time_ms: int thought_signature: Optional[bytes] = None
class RateLimitError(Exception): def init(self, retry_after: int = 60): self.retry_after = retry_after
class ContentPolicyError(Exception): def init(self, reason: str = "Content violates usage policies"): self.reason = reason
class GenerationError(Exception): def init(self, message: str, details: dict = None): self.message = message self.details = details or {}
class AIGenerationClient: """Async client for AI generation APIs with retry logic."""
RETRY_DELAYS = [1, 2, 4] # Exponential backoff
BASE_URL = "https://generativelanguage.googleapis.com/v1beta"
STRICT_CONSTRAINT = """STRICT RULES:
-
CREATE ORIGINAL ART - Do NOT use screenshots or existing images.
-
TEXT RENDERING - Render ALL text EXACTLY as written.
-
QUANTITIES - If prompt says "3 items" render EXACTLY 3.
-
NO ADDITIONS - Do NOT add elements not mentioned. """
def init(self, api_key: str, timeout: int = 120, max_retries: int = 3): self.api_key = api_key self.timeout = timeout self.max_retries = min(max_retries, len(self.RETRY_DELAYS)) self._session: Optional[aiohttp.ClientSession] = None
async def _get_session(self) -> aiohttp.ClientSession: if self._session is None or self._session.closed: self._session = aiohttp.ClientSession( timeout=aiohttp.ClientTimeout(total=self.timeout) ) return self._session
async def close(self): if self._session and not self._session.closed: await self._session.close()
async def generate(self, request: GenerationRequest) -> GenerationResponse: """Generate with exponential backoff retry.""" last_exception = None
for attempt in range(self.max_retries): try: return await self._execute_generation(request) except ContentPolicyError: raise # Don't retry content policy violations except RateLimitError as e: last_exception = e delay = e.retry_after if e.retry_after else self.RETRY_DELAYS[attempt] if attempt < self.max_retries - 1: await asyncio.sleep(delay) continue raise except (GenerationError, asyncio.TimeoutError) as e: last_exception = e if attempt < self.max_retries - 1: await asyncio.sleep(self.RETRY_DELAYS[attempt]) continue raise raise last_exception or GenerationError("Generation failed after all retries")async def _execute_generation(self, request: GenerationRequest) -> GenerationResponse: generation_id = str(uuid.uuid4()) used_seed = request.seed or int(time.time() * 1000) % (2**31) start_time = time.time()
# Build prompt with constraints constrained_prompt = f"{self.STRICT_CONSTRAINT}{request.prompt}\n\nGenerate as {request.width}x{request.height} pixels." parts = [] if request.input_image: parts.append({ "inlineData": { "mimeType": "image/png", "data": base64.b64encode(request.input_image).decode() } }) parts.append({"text": constrained_prompt}) # Handle multi-turn conversation if request.conversation_history: contents = self._build_multi_turn(request.conversation_history, request.prompt, request.width, request.height) else: contents = [{"parts": parts}] request_body = { "contents": contents, "generationConfig": { "responseModalities": ["IMAGE", "TEXT"], }, "safetySettings": [ {"category": "HARM_CATEGORY_HARASSMENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"}, {"category": "HARM_CATEGORY_HATE_SPEECH", "threshold": "BLOCK_MEDIUM_AND_ABOVE"}, {"category": "HARM_CATEGORY_SEXUALLY_EXPLICIT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"}, {"category": "HARM_CATEGORY_DANGEROUS_CONTENT", "threshold": "BLOCK_MEDIUM_AND_ABOVE"}, ] } url = f"{self.BASE_URL}/models/{request.model}:generateContent" headers = {"Content-Type": "application/json", "x-goog-api-key": self.api_key} session = await self._get_session() async with session.post(url, json=request_body, headers=headers) as response: inference_time_ms = int((time.time() - start_time) * 1000) if response.status == 200: data = await response.json() image_data, thought_sig = self._extract_image(data) return GenerationResponse( image_data=image_data, generation_id=generation_id, seed=used_seed, inference_time_ms=inference_time_ms, thought_signature=thought_sig, ) elif response.status == 429: retry_after = int(response.headers.get("Retry-After", 60)) raise RateLimitError(retry_after=retry_after) elif response.status == 400: error_data = await response.json() error_str = str(error_data).lower() if any(term in error_str for term in ["safety", "blocked", "policy"]): raise ContentPolicyError(reason=str(error_data)) raise GenerationError(f"Bad request: {error_data}") else: error_text = await response.text() raise GenerationError(f"API error {response.status}: {error_text}")def _build_multi_turn(self, history: List[dict], prompt: str, width: int, height: int) -> List[dict]: contents = [] for turn in history: parts = [] if turn.get("text"): parts.append({"text": turn["text"]}) if turn.get("image_data"): image_b64 = base64.b64encode(turn["image_data"]).decode() if isinstance(turn["image_data"], bytes) else turn["image_data"] parts.append({"inlineData": {"mimeType": "image/png", "data": image_b64}}) if parts: contents.append({"role": turn.get("role", "user"), "parts": parts})
contents.append({ "role": "user", "parts": [{"text": f"Refinement: {prompt}\n\nKeep at {width}x{height} pixels."}] }) return contentsdef _extract_image(self, data: dict) -> tuple: candidates = data.get("candidates", []) if not candidates: raise GenerationError("No image generated")
parts = candidates[0].get("content", {}).get("parts", []) for part in parts: if "inlineData" in part and "data" in part["inlineData"]: image_data = base64.b64decode(part["inlineData"]["data"]) thought_sig = base64.b64decode(part["thoughtSignature"]) if "thoughtSignature" in part else None return image_data, thought_sig raise GenerationError("No image data in response")
TypeScript
interface GenerationRequest { prompt: string; width: number; height: number; model?: string; seed?: number; inputImage?: Buffer; conversationHistory?: Array<{ role: string; text?: string; imageData?: Buffer }>; }
interface GenerationResponse { imageData: Buffer; generationId: string; seed: number; inferenceTimeMs: number; thoughtSignature?: Buffer; }
class RateLimitError extends Error {
constructor(public retryAfter: number = 60) {
super(Rate limit exceeded. Retry after ${retryAfter} seconds.);
}
}
class ContentPolicyError extends Error {
constructor(public reason: string = "Content violates usage policies") {
super(Content policy violation: ${reason});
}
}
class AIGenerationClient { private static RETRY_DELAYS = [1000, 2000, 4000];
constructor( private apiKey: string, private timeout: number = 120000, private maxRetries: number = 3 ) {}
async generate(request: GenerationRequest): Promise<GenerationResponse> { let lastError: Error | null = null;
for (let attempt = 0; attempt < this.maxRetries; attempt++) {
try {
return await this.executeGeneration(request);
} catch (error) {
if (error instanceof ContentPolicyError) throw error;
lastError = error as Error;
if (attempt < this.maxRetries - 1) {
const delay = error instanceof RateLimitError
? error.retryAfter * 1000
: AIGenerationClient.RETRY_DELAYS[attempt];
await new Promise(resolve => setTimeout(resolve, delay));
}
}
}
throw lastError || new Error('Generation failed after all retries');
}
private async executeGeneration(request: GenerationRequest): Promise<GenerationResponse> { const generationId = crypto.randomUUID(); const seed = request.seed ?? Math.floor(Date.now() % (2 ** 31)); const startTime = Date.now();
const response = await fetch(
`https://generativelanguage.googleapis.com/v1beta/models/${request.model || 'gemini-2.0-flash-exp'}:generateContent`,
{
method: 'POST',
headers: {
'Content-Type': 'application/json',
'x-goog-api-key': this.apiKey,
},
body: JSON.stringify(this.buildRequestBody(request)),
signal: AbortSignal.timeout(this.timeout),
}
);
const inferenceTimeMs = Date.now() - startTime;
if (response.status === 429) {
const retryAfter = parseInt(response.headers.get('Retry-After') || '60');
throw new RateLimitError(retryAfter);
}
if (response.status === 400) {
const error = await response.json();
if (JSON.stringify(error).toLowerCase().includes('safety')) {
throw new ContentPolicyError(JSON.stringify(error));
}
throw new Error(`Bad request: ${JSON.stringify(error)}`);
}
if (!response.ok) {
throw new Error(`API error ${response.status}`);
}
const data = await response.json();
const imageData = this.extractImage(data);
return { imageData, generationId, seed, inferenceTimeMs };
}
private buildRequestBody(request: GenerationRequest): object { const parts: any[] = [];
if (request.inputImage) {
parts.push({
inlineData: {
mimeType: 'image/png',
data: request.inputImage.toString('base64'),
},
});
}
parts.push({ text: request.prompt });
return {
contents: [{ parts }],
generationConfig: { responseModalities: ['IMAGE', 'TEXT'] },
};
}
private extractImage(data: any): Buffer { const parts = data.candidates?.[0]?.content?.parts || []; for (const part of parts) { if (part.inlineData?.data) { return Buffer.from(part.inlineData.data, 'base64'); } } throw new Error('No image data in response'); } }
Usage Examples
Basic Generation
client = AIGenerationClient(api_key="your-key")
response = await client.generate(GenerationRequest( prompt="A cute cartoon banana mascot waving", width=512, height=512, ))
response.image_data contains PNG bytes
Multi-Turn Refinement
First generation
response1 = await client.generate(GenerationRequest( prompt="Gaming thumbnail with bold text 'EPIC WIN'", width=1280, height=720, ))
Refinement (cheaper, uses context)
response2 = await client.generate(GenerationRequest( prompt="Make the text bigger and add more glow", width=1280, height=720, conversation_history=[ {"role": "user", "text": "Gaming thumbnail with bold text 'EPIC WIN'"}, {"role": "model", "image_data": response1.image_data}, ], ))
Best Practices
-
Always use retry logic - AI APIs can be flaky
-
Respect Retry-After headers for rate limits
-
Don't retry content policy errors
-
Use strict prompts to prevent hallucination
-
Track generation IDs for debugging
-
Set appropriate timeouts (30-120s)
Common Mistakes
-
No retry logic (fails on transient errors)
-
Retrying content policy violations (wastes quota)
-
Ignoring Retry-After headers (gets blocked)
-
No timeout (hangs forever)
-
Missing generation ID logging
Related Patterns
-
prompt-engine - Template-based prompt building
-
rate-limiting - Protect your API quota
-
circuit-breaker - Handle AI service outages