provenance-audit

AI Provenance & Audit Trail

Complete provenance tracking for AI-generated content with decision factors, data lineage, and cost tracking.

When to Use This Skill

Need to explain why AI made specific suggestions
Regulatory compliance requires audit trails
Want to track AI generation costs
Need confidence scoring for AI outputs
Building explainable AI systems

Core Concepts

Provenance tracking captures: decision factors (why), data lineage (from what), reasoning chain (how), confidence scoring, and generation metrics (cost/tokens).

Implementation

TypeScript

enum InsightType { CONTENT_SUGGESTION = 'content_suggestion', IMAGE_GENERATION = 'image_generation', TEXT_GENERATION = 'text_generation', }

enum ConfidenceLevel { VERY_HIGH = 'very_high', // 90-100% HIGH = 'high', // 75-89% MEDIUM = 'medium', // 50-74% LOW = 'low', // 25-49% VERY_LOW = 'very_low', // 0-24% }

interface DataSource { sourceType: string; // 'database', 'api', 'cache' sourceKey: string; // 'postgres:users', 'openai:gpt-4' recordsUsed: number; freshnessSeconds: number; qualityScore: number; // 0-1 sampleIds: string[]; }

interface DecisionFactor { factorName: string; rawValue: number; normalizedValue: number; // 0-1 weight: number; contribution: number; // normalizedValue * weight reasoning: string; }

interface ReasoningStep { stepNumber: number; operation: string; // 'filter', 'score', 'generate' description: string; inputCount: number; outputCount: number; algorithm: string; durationMs: number; }

interface GenerationMetrics { model: string; promptTokens: number; completionTokens: number; totalTokens: number; latencyMs: number; estimatedCostUsd: number; }

interface ProvenanceRecord { provenanceId: string; workerId: string; userId: string; insightType: InsightType; computedAt: Date; durationMs: number; insightId: string; insightSummary: string; confidenceScore: number; confidenceLevel: ConfidenceLevel; dataSources: DataSource[]; decisionFactors: DecisionFactor[]; reasoningChain: ReasoningStep[]; generationMetrics?: GenerationMetrics; validationPassed: boolean; tags: string[]; }

// Provenance Builder - Fluent API class ProvenanceBuilder { private record: Partial<ProvenanceRecord>; private startTime: number; private stepCounter = 0;

constructor(insightType: InsightType, workerId: string) { this.startTime = Date.now(); this.record = { provenanceId: crypto.randomUUID(), workerId, insightType, computedAt: new Date(), dataSources: [], decisionFactors: [], reasoningChain: [], tags: [], validationPassed: true, }; }

setUser(userId: string): this { this.record.userId = userId; return this; }

addDatabaseSource(table: string, recordsUsed: number, freshnessSeconds: number): this { this.record.dataSources!.push({ sourceType: 'database', sourceKey: postgres:${table}, recordsUsed, freshnessSeconds, qualityScore: 0.95, sampleIds: [], }); return this; }

addApiSource(provider: string, model: string): this { this.record.dataSources!.push({ sourceType: 'api', sourceKey: ${provider}:${model}, recordsUsed: 1, freshnessSeconds: 0, qualityScore: 0.9, sampleIds: [], }); return this; }

addFactor(name: string, rawValue: number, normalized: number, weight: number, reasoning: string): this { this.record.decisionFactors!.push({ factorName: name, rawValue, normalizedValue: normalized, weight, contribution: normalized * weight, reasoning, }); return this; }

addReasoningStep(operation: string, description: string, inputCount: number, outputCount: number, algorithm: string, durationMs = 0): this { this.stepCounter++; this.record.reasoningChain!.push({ stepNumber: this.stepCounter, operation, description, inputCount, outputCount, algorithm, durationMs, }); return this; }

setGenerationMetrics(metrics: GenerationMetrics): this { this.record.generationMetrics = metrics; return this; }

setInsight(id: string, summary: string): this { this.record.insightId = id; this.record.insightSummary = summary; return this; }

setConfidence(score: number): this { this.record.confidenceScore = score; this.record.confidenceLevel = getConfidenceLevel(score); return this; }

build(): ProvenanceRecord { this.record.durationMs = Date.now() - this.startTime; return this.record as ProvenanceRecord; } }

function getConfidenceLevel(score: number): ConfidenceLevel { const normalized = score <= 1 ? score * 100 : score; if (normalized >= 90) return ConfidenceLevel.VERY_HIGH; if (normalized >= 75) return ConfidenceLevel.HIGH; if (normalized >= 50) return ConfidenceLevel.MEDIUM; if (normalized >= 25) return ConfidenceLevel.LOW; return ConfidenceLevel.VERY_LOW; }

Usage Examples

async function generateWithProvenance(userId: string, topic: string) { const provenance = new ProvenanceBuilder(InsightType.CONTENT_SUGGESTION, 'content-worker') .setUser(userId);

// Step 1: Fetch data const data = await fetchTrending(topic); provenance .addDatabaseSource('trending_topics', data.length, 300) .addReasoningStep('filter', Fetched ${data.length} items for "${topic}", 0, data.length, 'sql_query');

// Step 2: Score const scored = scoreItems(data); provenance .addFactor('trend_velocity', scored.velocity, scored.normalizedVelocity, 0.4, Velocity of ${scored.velocity}/hr indicates ${scored.normalizedVelocity > 0.7 ? 'high' : 'moderate'} interest) .addReasoningStep('score', 'Calculated weighted scores', data.length, data.length, 'weighted_average');

// Step 3: Generate with AI const startGen = Date.now(); const result = await openai.chat.completions.create({ model: 'gpt-4', messages: [{ role: 'user', content: Suggest content for: ${topic} }], });

provenance .addApiSource('openai', 'gpt-4') .setGenerationMetrics({ model: 'gpt-4', promptTokens: result.usage?.prompt_tokens || 0, completionTokens: result.usage?.completion_tokens || 0, totalTokens: result.usage?.total_tokens || 0, latencyMs: Date.now() - startGen, estimatedCostUsd: calculateCost(result.usage), }) .addReasoningStep('generate', 'Generated suggestion using GPT-4', 1, 1, 'gpt-4', Date.now() - startGen);

// Finalize const suggestion = result.choices[0].message.content!; provenance .setInsight(crypto.randomUUID(), Content suggestion for "${topic}") .setConfidence(0.85);

const record = provenance.build(); await provenanceStore.save(record);

return { suggestion, provenanceId: record.provenanceId }; }

Best Practices

Start provenance builder at function entry to capture full duration
Record all data sources with freshness information
Include human-readable reasoning for each decision factor
Track token usage and costs for budget monitoring
Store sample IDs for audit trail verification

Common Mistakes

Not capturing all data sources used in decision
Missing reasoning explanations (just numbers)
Forgetting to track generation costs
Not persisting provenance records
Skipping confidence scoring

Related Patterns

ai-generation-client (generation execution)
ai-coaching (intent extraction)
logging-observability (general logging)

provenance-audit

Safety Notice

Copy this and send it to your AI assistant to learn

Source Transparency

Related Skills

webhook-security

row-level-security

audit-logging

oauth-social-login