data-integrity-auditor

Data Integrity Auditor

Safety Notice

This listing is imported from skills.sh public index metadata. Review upstream SKILL.md and repository scripts before running.

Copy this and send it to your AI assistant to learn

Install skill "data-integrity-auditor" with this command: npx skills add monkey1sai/openai-cli/monkey1sai-openai-cli-data-integrity-auditor

Data Integrity Auditor

Detect and fix data integrity issues automatically.

Integrity Check Types

  1. Orphaned Records

-- Find orphaned orders (no matching user) SELECT o.id, o.user_id FROM orders o LEFT JOIN users u ON u.id = o.user_id WHERE u.id IS NULL;

-- Find orphaned order items (no matching order) SELECT oi.id, oi.order_id FROM order_items oi LEFT JOIN orders o ON o.id = oi.order_id WHERE o.id IS NULL;

  1. Broken Foreign Keys

// scripts/check-foreign-keys.ts async function checkForeignKeys() { const issues: string[] = [];

// Orders → Users const orphanedOrders = await prisma.$queryRaw<any[]> SELECT o.id, o.user_id FROM orders o LEFT JOIN users u ON u.id = o.user_id WHERE u.id IS NULL LIMIT 100 ;

if (orphanedOrders.length > 0) { issues.push( ❌ Found ${orphanedOrders.length} orders with invalid user_id ); console.log( " Sample IDs:", orphanedOrders.slice(0, 5).map((o) => o.id) ); }

// Order Items → Orders const orphanedItems = await prisma.$queryRaw<any[]> SELECT oi.id, oi.order_id FROM order_items oi LEFT JOIN orders o ON o.id = oi.order_id WHERE o.id IS NULL LIMIT 100 ;

if (orphanedItems.length > 0) { issues.push( ❌ Found ${orphanedItems.length} order items with invalid order_id ); }

// Products → Categories const orphanedProducts = await prisma.$queryRaw<any[]> SELECT p.id, p.category_id FROM products p LEFT JOIN categories c ON c.id = p.category_id WHERE p.category_id IS NOT NULL AND c.id IS NULL LIMIT 100 ;

if (orphanedProducts.length > 0) { issues.push( ❌ Found ${orphanedProducts.length} products with invalid category_id ); }

return issues; }

  1. Constraint Violations

async function checkConstraints() { const issues: string[] = [];

// Check email uniqueness (should be caught by DB, but verify) const duplicateEmails = await prisma.$queryRaw<any[]> SELECT email, COUNT(*) as count FROM users GROUP BY email HAVING COUNT(*) > 1 ;

if (duplicateEmails.length > 0) { issues.push(❌ Found ${duplicateEmails.length} duplicate emails); }

// Check negative quantities const negativeStock = await prisma.$queryRaw<any[]> SELECT id, name, stock FROM products WHERE stock &#x3C; 0 ;

if (negativeStock.length > 0) { issues.push( ❌ Found ${negativeStock.length} products with negative stock ); }

// Check negative prices const negativePrices = await prisma.$queryRaw<any[]> SELECT id, name, price FROM products WHERE price &#x3C; 0 ;

if (negativePrices.length > 0) { issues.push( ❌ Found ${negativePrices.length} products with negative prices ); }

// Check invalid order status const invalidStatus = await prisma.$queryRaw<any[]> SELECT id, status FROM orders WHERE status NOT IN ('pending', 'paid', 'shipped', 'delivered', 'cancelled') ;

if (invalidStatus.length > 0) { issues.push(❌ Found ${invalidStatus.length} orders with invalid status); }

return issues; }

  1. Missing Required Fields

async function checkMissingFields() { const issues: string[] = [];

// Users missing required fields const usersNoEmail = await prisma.user.count({ where: { email: null }, });

if (usersNoEmail > 0) { issues.push(❌ Found ${usersNoEmail} users without email); }

// Orders with NULL totals const ordersNoTotal = await prisma.order.count({ where: { total: null }, });

if (ordersNoTotal > 0) { issues.push(❌ Found ${ordersNoTotal} orders without total); }

return issues; }

Comprehensive Audit Script

// scripts/audit-data-integrity.ts interface IntegrityIssue { severity: "critical" | "warning" | "info"; category: string; message: string; count: number; query?: string; fix?: string; }

async function auditDataIntegrity(): Promise<IntegrityIssue[]> { const issues: IntegrityIssue[] = [];

console.log("🔍 Auditing data integrity...\n");

// 1. Check orphaned records const orphanedOrders = await prisma.$queryRaw<any[]> SELECT COUNT(*) as count FROM orders o LEFT JOIN users u ON u.id = o.user_id WHERE u.id IS NULL ;

if (orphanedOrders[0].count > 0) { issues.push({ severity: "critical", category: "orphaned-records", message: "Orders with invalid user references", count: orphanedOrders[0].count, query: "SELECT id, user_id FROM orders o LEFT JOIN users u ON u.id = o.user_id WHERE u.id IS NULL", fix: "DELETE FROM orders WHERE id IN (...)", }); }

// 2. Check duplicate unique constraints const duplicateEmails = await prisma.$queryRaw<any[]> SELECT email, COUNT(*) as count FROM users GROUP BY email HAVING COUNT(*) > 1 ;

if (duplicateEmails.length > 0) { issues.push({ severity: "critical", category: "constraint-violation", message: "Duplicate email addresses", count: duplicateEmails.length, fix: "Keep newest record, delete duplicates", }); }

// 3. Check data inconsistencies const invalidPrices = await prisma.$queryRaw<any[]> SELECT COUNT(*) as count FROM products WHERE price &#x3C; 0 ;

if (invalidPrices[0].count > 0) { issues.push({ severity: "warning", category: "data-quality", message: "Products with negative prices", count: invalidPrices[0].count, fix: "UPDATE products SET price = ABS(price) WHERE price < 0", }); }

// 4. Check referential integrity const brokenOrderItems = await prisma.$queryRaw<any[]> SELECT COUNT(*) as count FROM order_items oi LEFT JOIN orders o ON o.id = oi.order_id WHERE o.id IS NULL ;

if (brokenOrderItems[0].count > 0) { issues.push({ severity: "critical", category: "referential-integrity", message: "Order items referencing non-existent orders", count: brokenOrderItems[0].count, fix: "DELETE FROM order_items WHERE order_id NOT IN (SELECT id FROM orders)", }); }

return issues; }

async function generateReport() { const issues = await auditDataIntegrity();

console.log("\n📊 Data Integrity Report\n"); console.log(Total issues: ${issues.length}\n);

const grouped = issues.reduce((acc, issue) => { if (!acc[issue.severity]) acc[issue.severity] = []; acc[issue.severity].push(issue); return acc; }, {} as Record<string, IntegrityIssue[]>);

(["critical", "warning", "info"] as const).forEach((severity) => { const items = grouped[severity] || []; if (items.length === 0) return;

console.log(`\n${severity.toUpperCase()} (${items.length})\n`);

items.forEach((issue, i) => {
  console.log(`${i + 1}. [${issue.category}] ${issue.message}`);
  console.log(`   Count: ${issue.count}`);
  if (issue.query) {
    console.log(`   Query: ${issue.query.substring(0, 80)}...`);
  }
  if (issue.fix) {
    console.log(`   Fix: ${issue.fix}`);
  }
  console.log();
});

});

// Exit with error if critical issues process.exit(grouped.critical?.length > 0 ? 1 : 0); }

generateReport();

Automated Fixes

// scripts/fix-integrity-issues.ts async function fixOrphanedRecords() { console.log("🔧 Fixing orphaned records...\n");

// Delete orphaned orders const deletedOrders = await prisma.$executeRaw DELETE FROM orders WHERE id IN ( SELECT o.id FROM orders o LEFT JOIN users u ON u.id = o.user_id WHERE u.id IS NULL ) ; console.log(✅ Deleted ${deletedOrders} orphaned orders);

// Delete orphaned order items const deletedItems = await prisma.$executeRaw DELETE FROM order_items WHERE id IN ( SELECT oi.id FROM order_items oi LEFT JOIN orders o ON o.id = oi.order_id WHERE o.id IS NULL ) ; console.log(✅ Deleted ${deletedItems} orphaned order items); }

async function fixDuplicates() { console.log("🔧 Fixing duplicate records...\n");

// Keep newest user, delete old duplicates await prisma.$executeRaw DELETE FROM users WHERE id IN ( SELECT id FROM ( SELECT id, ROW_NUMBER() OVER (PARTITION BY email ORDER BY created_at DESC) as rn FROM users ) t WHERE rn > 1 ) ; console.log(✅ Fixed duplicate emails); }

async function fixConstraintViolations() { console.log("🔧 Fixing constraint violations...\n");

// Fix negative prices const fixedPrices = await prisma.$executeRaw UPDATE products SET price = ABS(price) WHERE price &#x3C; 0 ; console.log(✅ Fixed ${fixedPrices} negative prices);

// Fix negative stock const fixedStock = await prisma.$executeRaw UPDATE products SET stock = 0 WHERE stock &#x3C; 0 ; console.log(✅ Fixed ${fixedStock} negative stock values); }

Prevention: Add Missing Constraints

-- Migration to add missing constraints

-- 1. Add foreign key constraints ALTER TABLE orders ADD CONSTRAINT fk_orders_user_id FOREIGN KEY (user_id) REFERENCES users(id) ON DELETE CASCADE;

ALTER TABLE order_items ADD CONSTRAINT fk_order_items_order_id FOREIGN KEY (order_id) REFERENCES orders(id) ON DELETE CASCADE;

-- 2. Add check constraints ALTER TABLE products ADD CONSTRAINT chk_products_price_positive CHECK (price >= 0);

ALTER TABLE products ADD CONSTRAINT chk_products_stock_non_negative CHECK (stock >= 0);

-- 3. Add unique constraints CREATE UNIQUE INDEX idx_users_email_unique ON users(LOWER(email));

-- 4. Add NOT NULL constraints ALTER TABLE users ALTER COLUMN email SET NOT NULL;

ALTER TABLE orders ALTER COLUMN total SET NOT NULL;

Automated Testing

// tests/data-integrity.test.ts describe("Data Integrity", () => { it("should not allow orphaned orders", async () => { // Try to create order with non-existent user await expect( prisma.order.create({ data: { userId: 99999, // Non-existent total: 100, status: "pending", }, }) ).rejects.toThrow("Foreign key constraint"); });

it("should not allow negative prices", async () => { await expect( prisma.product.create({ data: { name: "Test", price: -10, // Invalid stock: 100, }, }) ).rejects.toThrow("Check constraint"); });

it("should not allow duplicate emails", async () => { await prisma.user.create({ data: { email: "test@example.com", name: "Test" }, });

await expect(
  prisma.user.create({
    data: { email: "test@example.com", name: "Test 2" },
  })
).rejects.toThrow("Unique constraint");

}); });

Monitoring Dashboard

// Monitor data quality metrics async function getDataQualityMetrics() { return { orphanedOrders: await prisma.$queryRaw SELECT COUNT(*) FROM orders o LEFT JOIN users u ON u.id = o.user_id WHERE u.id IS NULL , duplicateEmails: await prisma.$queryRaw SELECT COUNT(*) FROM ( SELECT email FROM users GROUP BY email HAVING COUNT(*) > 1 ) t , invalidPrices: await prisma.$queryRaw SELECT COUNT(*) FROM products WHERE price &#x3C; 0 , missingData: await prisma.$queryRaw SELECT SUM(CASE WHEN email IS NULL THEN 1 ELSE 0 END) as users_no_email, SUM(CASE WHEN total IS NULL THEN 1 ELSE 0 END) as orders_no_total FROM users CROSS JOIN orders , }; }

Best Practices

  • Add constraints: Prevent issues at database level

  • Regular audits: Weekly integrity checks

  • Automated fixes: Safe, reversible repairs

  • Monitor metrics: Track data quality over time

  • Test constraints: Ensure they work

  • Soft deletes: Easier recovery

  • Backup before fixes: Always

Output Checklist

  • Orphaned record detection

  • Foreign key integrity checks

  • Constraint violation detection

  • Missing field checks

  • Automated audit script

  • Fix scripts (with dry-run)

  • Prevention migrations (add constraints)

  • Automated tests

  • Monitoring dashboard

  • Regular audit schedule

Source Transparency

This detail page is rendered from real SKILL.md content. Trust labels are metadata-based hints, not a safety guarantee.

Related Skills

Related by shared tags or category signals.

Security

security-pr-checklist-skill

No summary provided by upstream source.

Repository SourceNeeds Review
Security

auth-security-reviewer

No summary provided by upstream source.

Repository SourceNeeds Review
Security

api-security-hardener

No summary provided by upstream source.

Repository SourceNeeds Review
Security

accessibility-auditor

No summary provided by upstream source.

Repository SourceNeeds Review