Data Integrity Auditor
Detect and fix data integrity issues automatically.
Integrity Check Types
- Orphaned Records
-- Find orphaned orders (no matching user) SELECT o.id, o.user_id FROM orders o LEFT JOIN users u ON u.id = o.user_id WHERE u.id IS NULL;
-- Find orphaned order items (no matching order) SELECT oi.id, oi.order_id FROM order_items oi LEFT JOIN orders o ON o.id = oi.order_id WHERE o.id IS NULL;
- Broken Foreign Keys
// scripts/check-foreign-keys.ts async function checkForeignKeys() { const issues: string[] = [];
// Orders → Users
const orphanedOrders = await prisma.$queryRaw<any[]> SELECT o.id, o.user_id FROM orders o LEFT JOIN users u ON u.id = o.user_id WHERE u.id IS NULL LIMIT 100 ;
if (orphanedOrders.length > 0) {
issues.push(
❌ Found ${orphanedOrders.length} orders with invalid user_id
);
console.log(
" Sample IDs:",
orphanedOrders.slice(0, 5).map((o) => o.id)
);
}
// Order Items → Orders
const orphanedItems = await prisma.$queryRaw<any[]> SELECT oi.id, oi.order_id FROM order_items oi LEFT JOIN orders o ON o.id = oi.order_id WHERE o.id IS NULL LIMIT 100 ;
if (orphanedItems.length > 0) {
issues.push(
❌ Found ${orphanedItems.length} order items with invalid order_id
);
}
// Products → Categories
const orphanedProducts = await prisma.$queryRaw<any[]> SELECT p.id, p.category_id FROM products p LEFT JOIN categories c ON c.id = p.category_id WHERE p.category_id IS NOT NULL AND c.id IS NULL LIMIT 100 ;
if (orphanedProducts.length > 0) {
issues.push(
❌ Found ${orphanedProducts.length} products with invalid category_id
);
}
return issues; }
- Constraint Violations
async function checkConstraints() { const issues: string[] = [];
// Check email uniqueness (should be caught by DB, but verify)
const duplicateEmails = await prisma.$queryRaw<any[]> SELECT email, COUNT(*) as count FROM users GROUP BY email HAVING COUNT(*) > 1 ;
if (duplicateEmails.length > 0) {
issues.push(❌ Found ${duplicateEmails.length} duplicate emails);
}
// Check negative quantities
const negativeStock = await prisma.$queryRaw<any[]> SELECT id, name, stock FROM products WHERE stock < 0 ;
if (negativeStock.length > 0) {
issues.push(
❌ Found ${negativeStock.length} products with negative stock
);
}
// Check negative prices
const negativePrices = await prisma.$queryRaw<any[]> SELECT id, name, price FROM products WHERE price < 0 ;
if (negativePrices.length > 0) {
issues.push(
❌ Found ${negativePrices.length} products with negative prices
);
}
// Check invalid order status
const invalidStatus = await prisma.$queryRaw<any[]> SELECT id, status FROM orders WHERE status NOT IN ('pending', 'paid', 'shipped', 'delivered', 'cancelled') ;
if (invalidStatus.length > 0) {
issues.push(❌ Found ${invalidStatus.length} orders with invalid status);
}
return issues; }
- Missing Required Fields
async function checkMissingFields() { const issues: string[] = [];
// Users missing required fields const usersNoEmail = await prisma.user.count({ where: { email: null }, });
if (usersNoEmail > 0) {
issues.push(❌ Found ${usersNoEmail} users without email);
}
// Orders with NULL totals const ordersNoTotal = await prisma.order.count({ where: { total: null }, });
if (ordersNoTotal > 0) {
issues.push(❌ Found ${ordersNoTotal} orders without total);
}
return issues; }
Comprehensive Audit Script
// scripts/audit-data-integrity.ts interface IntegrityIssue { severity: "critical" | "warning" | "info"; category: string; message: string; count: number; query?: string; fix?: string; }
async function auditDataIntegrity(): Promise<IntegrityIssue[]> { const issues: IntegrityIssue[] = [];
console.log("🔍 Auditing data integrity...\n");
// 1. Check orphaned records
const orphanedOrders = await prisma.$queryRaw<any[]> SELECT COUNT(*) as count FROM orders o LEFT JOIN users u ON u.id = o.user_id WHERE u.id IS NULL ;
if (orphanedOrders[0].count > 0) { issues.push({ severity: "critical", category: "orphaned-records", message: "Orders with invalid user references", count: orphanedOrders[0].count, query: "SELECT id, user_id FROM orders o LEFT JOIN users u ON u.id = o.user_id WHERE u.id IS NULL", fix: "DELETE FROM orders WHERE id IN (...)", }); }
// 2. Check duplicate unique constraints
const duplicateEmails = await prisma.$queryRaw<any[]> SELECT email, COUNT(*) as count FROM users GROUP BY email HAVING COUNT(*) > 1 ;
if (duplicateEmails.length > 0) { issues.push({ severity: "critical", category: "constraint-violation", message: "Duplicate email addresses", count: duplicateEmails.length, fix: "Keep newest record, delete duplicates", }); }
// 3. Check data inconsistencies
const invalidPrices = await prisma.$queryRaw<any[]> SELECT COUNT(*) as count FROM products WHERE price < 0 ;
if (invalidPrices[0].count > 0) { issues.push({ severity: "warning", category: "data-quality", message: "Products with negative prices", count: invalidPrices[0].count, fix: "UPDATE products SET price = ABS(price) WHERE price < 0", }); }
// 4. Check referential integrity
const brokenOrderItems = await prisma.$queryRaw<any[]> SELECT COUNT(*) as count FROM order_items oi LEFT JOIN orders o ON o.id = oi.order_id WHERE o.id IS NULL ;
if (brokenOrderItems[0].count > 0) { issues.push({ severity: "critical", category: "referential-integrity", message: "Order items referencing non-existent orders", count: brokenOrderItems[0].count, fix: "DELETE FROM order_items WHERE order_id NOT IN (SELECT id FROM orders)", }); }
return issues; }
async function generateReport() { const issues = await auditDataIntegrity();
console.log("\n📊 Data Integrity Report\n");
console.log(Total issues: ${issues.length}\n);
const grouped = issues.reduce((acc, issue) => { if (!acc[issue.severity]) acc[issue.severity] = []; acc[issue.severity].push(issue); return acc; }, {} as Record<string, IntegrityIssue[]>);
(["critical", "warning", "info"] as const).forEach((severity) => { const items = grouped[severity] || []; if (items.length === 0) return;
console.log(`\n${severity.toUpperCase()} (${items.length})\n`);
items.forEach((issue, i) => {
console.log(`${i + 1}. [${issue.category}] ${issue.message}`);
console.log(` Count: ${issue.count}`);
if (issue.query) {
console.log(` Query: ${issue.query.substring(0, 80)}...`);
}
if (issue.fix) {
console.log(` Fix: ${issue.fix}`);
}
console.log();
});
});
// Exit with error if critical issues process.exit(grouped.critical?.length > 0 ? 1 : 0); }
generateReport();
Automated Fixes
// scripts/fix-integrity-issues.ts async function fixOrphanedRecords() { console.log("🔧 Fixing orphaned records...\n");
// Delete orphaned orders
const deletedOrders = await prisma.$executeRaw DELETE FROM orders WHERE id IN ( SELECT o.id FROM orders o LEFT JOIN users u ON u.id = o.user_id WHERE u.id IS NULL ) ;
console.log(✅ Deleted ${deletedOrders} orphaned orders);
// Delete orphaned order items
const deletedItems = await prisma.$executeRaw DELETE FROM order_items WHERE id IN ( SELECT oi.id FROM order_items oi LEFT JOIN orders o ON o.id = oi.order_id WHERE o.id IS NULL ) ;
console.log(✅ Deleted ${deletedItems} orphaned order items);
}
async function fixDuplicates() { console.log("🔧 Fixing duplicate records...\n");
// Keep newest user, delete old duplicates
await prisma.$executeRaw DELETE FROM users WHERE id IN ( SELECT id FROM ( SELECT id, ROW_NUMBER() OVER (PARTITION BY email ORDER BY created_at DESC) as rn FROM users ) t WHERE rn > 1 ) ;
console.log(✅ Fixed duplicate emails);
}
async function fixConstraintViolations() { console.log("🔧 Fixing constraint violations...\n");
// Fix negative prices
const fixedPrices = await prisma.$executeRaw UPDATE products SET price = ABS(price) WHERE price < 0 ;
console.log(✅ Fixed ${fixedPrices} negative prices);
// Fix negative stock
const fixedStock = await prisma.$executeRaw UPDATE products SET stock = 0 WHERE stock < 0 ;
console.log(✅ Fixed ${fixedStock} negative stock values);
}
Prevention: Add Missing Constraints
-- Migration to add missing constraints
-- 1. Add foreign key constraints ALTER TABLE orders ADD CONSTRAINT fk_orders_user_id FOREIGN KEY (user_id) REFERENCES users(id) ON DELETE CASCADE;
ALTER TABLE order_items ADD CONSTRAINT fk_order_items_order_id FOREIGN KEY (order_id) REFERENCES orders(id) ON DELETE CASCADE;
-- 2. Add check constraints ALTER TABLE products ADD CONSTRAINT chk_products_price_positive CHECK (price >= 0);
ALTER TABLE products ADD CONSTRAINT chk_products_stock_non_negative CHECK (stock >= 0);
-- 3. Add unique constraints CREATE UNIQUE INDEX idx_users_email_unique ON users(LOWER(email));
-- 4. Add NOT NULL constraints ALTER TABLE users ALTER COLUMN email SET NOT NULL;
ALTER TABLE orders ALTER COLUMN total SET NOT NULL;
Automated Testing
// tests/data-integrity.test.ts describe("Data Integrity", () => { it("should not allow orphaned orders", async () => { // Try to create order with non-existent user await expect( prisma.order.create({ data: { userId: 99999, // Non-existent total: 100, status: "pending", }, }) ).rejects.toThrow("Foreign key constraint"); });
it("should not allow negative prices", async () => { await expect( prisma.product.create({ data: { name: "Test", price: -10, // Invalid stock: 100, }, }) ).rejects.toThrow("Check constraint"); });
it("should not allow duplicate emails", async () => { await prisma.user.create({ data: { email: "test@example.com", name: "Test" }, });
await expect(
prisma.user.create({
data: { email: "test@example.com", name: "Test 2" },
})
).rejects.toThrow("Unique constraint");
}); });
Monitoring Dashboard
// Monitor data quality metrics
async function getDataQualityMetrics() {
return {
orphanedOrders: await prisma.$queryRaw SELECT COUNT(*) FROM orders o LEFT JOIN users u ON u.id = o.user_id WHERE u.id IS NULL ,
duplicateEmails: await prisma.$queryRaw SELECT COUNT(*) FROM ( SELECT email FROM users GROUP BY email HAVING COUNT(*) > 1 ) t ,
invalidPrices: await prisma.$queryRaw SELECT COUNT(*) FROM products WHERE price < 0 ,
missingData: await prisma.$queryRaw SELECT SUM(CASE WHEN email IS NULL THEN 1 ELSE 0 END) as users_no_email, SUM(CASE WHEN total IS NULL THEN 1 ELSE 0 END) as orders_no_total FROM users CROSS JOIN orders ,
};
}
Best Practices
-
Add constraints: Prevent issues at database level
-
Regular audits: Weekly integrity checks
-
Automated fixes: Safe, reversible repairs
-
Monitor metrics: Track data quality over time
-
Test constraints: Ensure they work
-
Soft deletes: Easier recovery
-
Backup before fixes: Always
Output Checklist
-
Orphaned record detection
-
Foreign key integrity checks
-
Constraint violation detection
-
Missing field checks
-
Automated audit script
-
Fix scripts (with dry-run)
-
Prevention migrations (add constraints)
-
Automated tests
-
Monitoring dashboard
-
Regular audit schedule