db-performance-watchlist

DB Performance Watchlist

Monitor database performance and prevent regressions.

Key Performance Metrics

// performance-metrics.ts export interface DBMetrics { // Query Performance slowQueries: { threshold: number; // ms count: number; queries: SlowQuery[]; };

// Connection Pool connections: { active: number; idle: number; total: number; maxConnections: number; utilizationPercent: number; };

// Resource Usage resources: { cpuPercent: number; memoryPercent: number; diskUsagePercent: number; iops: number; };

// Query Statistics queryStats: { selectsPerSecond: number; insertsPerSecond: number; updatesPerSecond: number; deletesPerSecond: number; };

// Cache Performance cache: { hitRate: number; // % size: number; // MB evictions: number; };

// Index Usage indexes: { unusedIndexes: string[]; missingIndexes: string[]; }; }

interface SlowQuery { query: string; duration: number; calls: number; avgDuration: number; table: string; }

Slow Query Detection

// scripts/detect-slow-queries.ts async function detectSlowQueries(thresholdMs: number = 100) { // Enable slow query logging (PostgreSQL) await prisma.$executeRaw ALTER DATABASE mydb SET log_min_duration_statement = ${thresholdMs}; ;

// Query pg_stat_statements for slow queries const slowQueries = await prisma.$queryRaw<SlowQuery[]> SELECT query, calls, total_exec_time / 1000 as total_time_ms, mean_exec_time / 1000 as avg_time_ms, max_exec_time / 1000 as max_time_ms, (total_exec_time / sum(total_exec_time) OVER()) * 100 as percent_of_total FROM pg_stat_statements WHERE mean_exec_time > ${thresholdMs} ORDER BY mean_exec_time DESC LIMIT 20 ;

console.log("🐌 Slow Queries Detected:\n");

slowQueries.forEach((q, i) => { console.log(${i + 1}. ${q.query.substring(0, 80)}...); console.log( Calls: ${q.calls}); console.log( Avg: ${q.avg_time_ms.toFixed(2)}ms); console.log( Max: ${q.max_time_ms.toFixed(2)}ms); console.log( % of total time: ${q.percent_of_total.toFixed(1)}%\n); });

return slowQueries; }

Connection Pool Monitoring

async function monitorConnectionPool() { const stats = await prisma.$queryRaw<any[]> SELECT sum(numbackends) as total_connections, sum(CASE WHEN state = 'active' THEN 1 ELSE 0 END) as active, sum(CASE WHEN state = 'idle' THEN 1 ELSE 0 END) as idle, max_connections FROM pg_stat_database CROSS JOIN (SELECT setting::int as max_connections FROM pg_settings WHERE name = 'max_connections') WHERE datname = current_database() GROUP BY max_connections ;

const { total_connections, active, idle, max_connections } = stats[0]; const utilization = (total_connections / max_connections) * 100;

console.log("🔌 Connection Pool Status:"); console.log( Total: ${total_connections}/${max_connections} (${utilization.toFixed( 1 )}%) ); console.log( Active: ${active}); console.log( Idle: ${idle});

// Alert if > 80% utilization if (utilization > 80) { console.warn("⚠️ Connection pool >80% utilized!"); await sendAlert({ title: "High connection pool usage", message: ${utilization.toFixed(1)}% of connections in use, }); } }

Resource Monitoring

async function monitorResources() { // CPU Usage const cpuStats = await prisma.$queryRaw<any[]> SELECT (sum(total_exec_time) / (extract(epoch from (now() - stats_reset)) * 1000 * 100)) as cpu_percent FROM pg_stat_statements, pg_stat_database WHERE datname = current_database() ;

// Memory Usage const memStats = await prisma.$queryRaw<any[]> SELECT pg_size_pretty(pg_database_size(current_database())) as db_size, pg_size_pretty(sum(pg_relation_size(schemaname||'.'||tablename))) as tables_size FROM pg_tables WHERE schemaname = 'public' ;

// Cache Hit Rate const cacheStats = await prisma.$queryRaw<any[]> SELECT sum(heap_blks_hit) / (sum(heap_blks_hit) + sum(heap_blks_read)) * 100 as cache_hit_rate FROM pg_statio_user_tables ;

console.log("📊 Resource Usage:"); console.log( CPU: ${cpuStats[0].cpu_percent.toFixed(1)}%); console.log( Database Size: ${memStats[0].db_size}); console.log( Cache Hit Rate: ${cacheStats[0].cache_hit_rate.toFixed(1)}%);

// Alert if cache hit rate < 90% if (cacheStats[0].cache_hit_rate < 90) { console.warn("⚠️ Cache hit rate below 90%!"); await sendAlert({ title: "Low cache hit rate", message: Cache hit rate: ${cacheStats[0].cache_hit_rate.toFixed(1)}%, }); } }

Index Usage Analysis

async function analyzeIndexUsage() { // Find unused indexes const unusedIndexes = await prisma.$queryRaw<any[]> SELECT schemaname, tablename, indexname, idx_scan FROM pg_stat_user_indexes WHERE idx_scan = 0 AND indexname NOT LIKE '%_pkey' ORDER BY pg_relation_size(indexrelid) DESC ;

console.log("🗂️ Unused Indexes:\n"); unusedIndexes.forEach((idx) => { console.log( ${idx.tablename}.${idx.indexname} (0 scans)); });

// Find missing indexes (sequential scans on large tables) const missingIndexes = await prisma.$queryRaw<any[]> SELECT schemaname, tablename, seq_scan, seq_tup_read, idx_scan, n_live_tup FROM pg_stat_user_tables WHERE seq_scan > 1000 AND n_live_tup > 10000 ORDER BY seq_scan * n_live_tup DESC LIMIT 10 ;

console.log("\n📉 Tables with High Sequential Scans:\n"); missingIndexes.forEach((table) => { console.log( ${table.tablename}:); console.log( Sequential scans: ${table.seq_scan}); console.log( Rows: ${table.n_live_tup}); console.log( Index scans: ${table.idx_scan}); }); }

Alert Thresholds

const ALERT_THRESHOLDS = { slowQuery: { avgDuration: 500, // ms maxDuration: 2000, // ms callsPerMinute: 100, }, connections: { utilizationWarning: 70, // % utilizationCritical: 85, // % }, resources: { cpuWarning: 70, // % cpuCritical: 85, // % memoryWarning: 80, // % memoryCritical: 90, // % diskWarning: 75, // % diskCritical: 85, // % }, cache: { hitRateWarning: 90, // % hitRateCritical: 80, // % }, queryRate: { maxSelectsPerSecond: 10000, maxWritesPerSecond: 1000, }, };

async function checkThresholds() { const metrics = await gatherMetrics();

// Check slow queries if (metrics.slowQueries.count > 10) { await sendAlert({ level: "warning", title: "Slow queries detected", message: ${metrics.slowQueries.count} queries exceeding ${ALERT_THRESHOLDS.slowQuery.avgDuration}ms, }); }

// Check connection pool if ( metrics.connections.utilizationPercent > ALERT_THRESHOLDS.connections.utilizationCritical ) { await sendAlert({ level: "critical", title: "Connection pool critical", message: ${metrics.connections.utilizationPercent.toFixed( 1 )}% utilization, }); }

// Check cache hit rate if (metrics.cache.hitRate < ALERT_THRESHOLDS.cache.hitRateCritical) { await sendAlert({ level: "critical", title: "Cache hit rate critical", message: ${metrics.cache.hitRate.toFixed(1)}% hit rate, }); } }

Monitoring Dashboard

// Generate monitoring report async function generatePerformanceReport() { console.log("📊 Database Performance Report\n"); console.log("=".repeat(50) + "\n");

// Slow queries const slowQueries = await detectSlowQueries(100); console.log(Slow Queries (>100ms): ${slowQueries.length}\n);

// Connection pool await monitorConnectionPool(); console.log();

// Resources await monitorResources(); console.log();

// Index usage await analyzeIndexUsage(); console.log();

// Query rates const queryStats = await prisma.$queryRaw<any[]> SELECT sum(xact_commit + xact_rollback) as transactions, sum(tup_returned) as rows_read, sum(tup_inserted) as rows_inserted, sum(tup_updated) as rows_updated, sum(tup_deleted) as rows_deleted FROM pg_stat_database WHERE datname = current_database() ;

console.log("📈 Query Statistics:"); console.log( Transactions: ${queryStats[0].transactions}); console.log( Rows read: ${queryStats[0].rows_read}); console.log( Rows inserted: ${queryStats[0].rows_inserted}); console.log( Rows updated: ${queryStats[0].rows_updated}); console.log( Rows deleted: ${queryStats[0].rows_deleted}); }

Automated Monitoring Script

// scripts/monitor-db.ts import cron from "node-cron";

// Run every 5 minutes cron.schedule("*/5 * * * *", async () => { await checkThresholds(); });

// Generate report every hour cron.schedule("0 * * * *", async () => { await generatePerformanceReport(); });

// Analyze indexes weekly cron.schedule("0 0 * * 0", async () => { await analyzeIndexUsage(); });

Grafana Dashboard Queries

-- Query latency over time SELECT bucket, AVG(mean_exec_time) as avg_latency, MAX(max_exec_time) as max_latency, SUM(calls) as total_calls FROM pg_stat_statements WHERE query NOT LIKE '%pg_stat_statements%' GROUP BY time_bucket('5 minutes', queryid) ORDER BY bucket;

-- Connection count over time SELECT now() as time, count() as total, count() FILTER (WHERE state = 'active') as active, count(*) FILTER (WHERE state = 'idle') as idle FROM pg_stat_activity;

-- Cache hit rate SELECT sum(heap_blks_hit) / (sum(heap_blks_hit) + sum(heap_blks_read)) * 100 as cache_hit_rate FROM pg_statio_user_tables;

Best Practices

Monitor continuously: Don't wait for problems
Set appropriate thresholds: Based on your SLAs
Alert on trends: Not just absolute values
Review regularly: Weekly performance reviews
Automate everything: No manual checks
Document baselines: Know what's normal
Test alerts: Ensure they work

Output Checklist

Slow query detection configured
Connection pool monitoring
Resource usage tracking
Cache hit rate monitoring
Index usage analysis
Alert thresholds defined
Monitoring dashboard setup
Automated checks scheduled
Grafana/alerting integration
Performance baseline documented

db-performance-watchlist

Safety Notice

Copy this and send it to your AI assistant to learn

Source Transparency

Related Skills

readme-generator

bruno-collection-generator

redis-patterns