sql-patterns

SQL-Patterns

Advanced SQL patterns for data engineering beyond basic SELECT/JOIN.

Common Table Expressions (CTEs)

-- Chain transformations readably WITH active_users AS ( SELECT user_id, email FROM users WHERE status = 'active' ), user_orders AS ( SELECT u.user_id, COUNT(*) as order_count FROM active_users u JOIN orders o ON u.user_id = o.user_id GROUP BY u.user_id ) SELECT * FROM user_orders WHERE order_count > 5;

Window Functions

-- Row numbering within groups SELECT *, ROW_NUMBER() OVER (PARTITION BY category ORDER BY created_at DESC) as rn FROM products;

-- Running totals SELECT date, revenue, SUM(revenue) OVER (ORDER BY date) as cumulative_revenue FROM daily_sales;

-- Percent of total SELECT category, sales, sales * 100.0 / SUM(sales) OVER () as pct_of_total FROM category_sales;

-- Lead/Lag for time series SELECT date, value, LAG(value, 1) OVER (ORDER BY date) as prev_value, value - LAG(value, 1) OVER (ORDER BY date) as change FROM metrics;

-- Ranking with ties SELECT *, RANK() OVER (ORDER BY score DESC) as rank, -- 1,2,2,4 DENSE_RANK() OVER (ORDER BY score DESC) as drank -- 1,2,2,3 FROM scores;

Recursive CTEs

-- Hierarchical data (org chart, categories) WITH RECURSIVE org_tree AS ( -- Base case: top-level managers SELECT id, name, manager_id, 1 as depth FROM employees WHERE manager_id IS NULL

UNION ALL

-- Recursive case: subordinates SELECT e.id, e.name, e.manager_id, t.depth + 1 FROM employees e JOIN org_tree t ON e.manager_id = t.id ) SELECT * FROM org_tree;

-- Generate date series WITH RECURSIVE dates AS ( SELECT DATE '2024-01-01' as dt UNION ALL SELECT dt + INTERVAL '1 day' FROM dates WHERE dt < DATE '2024-12-31' ) SELECT * FROM dates;

CASE Expressions

-- Simple CASE SELECT CASE status WHEN 'A' THEN 'Active' WHEN 'I' THEN 'Inactive' ELSE 'Unknown' END as status_label FROM users;

-- Searched CASE for ranges SELECT CASE WHEN age < 18 THEN 'Minor' WHEN age < 65 THEN 'Adult' ELSE 'Senior' END as age_group FROM users;

-- Conditional aggregation SELECT COUNT() as total, COUNT() FILTER (WHERE status = 'active') as active_count, -- PostgreSQL SUM(CASE WHEN status = 'active' THEN 1 ELSE 0 END) as active_count -- ANSI FROM users;

UPSERT Patterns

-- PostgreSQL: INSERT ON CONFLICT INSERT INTO inventory (sku, quantity, updated_at) VALUES ('ABC123', 100, NOW()) ON CONFLICT (sku) DO UPDATE SET quantity = EXCLUDED.quantity, updated_at = EXCLUDED.updated_at;

-- MySQL: INSERT ON DUPLICATE KEY INSERT INTO inventory (sku, quantity, updated_at) VALUES ('ABC123', 100, NOW()) ON DUPLICATE KEY UPDATE quantity = VALUES(quantity), updated_at = VALUES(updated_at);

-- SQLite: INSERT OR REPLACE INSERT OR REPLACE INTO inventory (sku, quantity, updated_at) VALUES ('ABC123', 100, datetime('now'));

Efficient Pagination

-- BAD: OFFSET for large pages SELECT * FROM orders ORDER BY id LIMIT 20 OFFSET 10000;

-- GOOD: Keyset pagination SELECT * FROM orders WHERE id > 10000 -- last seen id ORDER BY id LIMIT 20;

Batch Operations

-- Batch DELETE with limit (avoid long locks) DELETE FROM logs WHERE created_at < NOW() - INTERVAL '90 days' LIMIT 10000;

-- Batch UPDATE UPDATE orders SET status = 'archived' WHERE id IN ( SELECT id FROM orders WHERE status = 'completed' AND completed_at < NOW() - INTERVAL '1 year' LIMIT 1000 );

Index-Friendly Queries

-- BAD: Function on indexed column SELECT * FROM users WHERE LOWER(email) = 'test@example.com';

-- GOOD: Store lowercase or use expression index SELECT * FROM users WHERE email_lower = 'test@example.com'; -- Or: CREATE INDEX idx_email_lower ON users (LOWER(email));

-- BAD: Leading wildcard SELECT * FROM products WHERE name LIKE '%widget%';

-- GOOD: Full-text search or prefix match SELECT * FROM products WHERE name LIKE 'widget%';

NULL Handling

-- COALESCE for defaults SELECT COALESCE(nickname, first_name, 'Anonymous') as display_name FROM users;

-- NULLIF to convert values to NULL SELECT NULLIF(status, '') as status -- empty string -> NULL FROM records;

-- IS DISTINCT FROM (NULL-safe comparison) SELECT * FROM a WHERE a.value IS DISTINCT FROM b.value; -- treats NULL != NULL as false

LATERAL Joins

-- Top N per group SELECT d.name, t.product, t.revenue FROM departments d CROSS JOIN LATERAL ( SELECT product, revenue FROM sales WHERE sales.dept_id = d.id ORDER BY revenue DESC LIMIT 3 ) t;

Materialized Views

-- Create for expensive aggregations CREATE MATERIALIZED VIEW daily_stats AS SELECT DATE_TRUNC('day', created_at) as date, COUNT(*) as total_orders, SUM(amount) as revenue FROM orders GROUP BY 1;

-- Refresh periodically REFRESH MATERIALIZED VIEW CONCURRENTLY daily_stats;

Query Optimization Checklist

Check EXPLAIN ANALYZE - Look for sequential scans on large tables
Add missing indexes - Columns in WHERE, JOIN, ORDER BY
**Avoid SELECT *** - Fetch only needed columns
Use EXISTS over IN - For correlated subqueries
Batch large operations - Avoid long-running transactions
Partition large tables - By date or category
Use connection pooling - Avoid connection overhead

Safety Notice

Copy this and send it to your AI assistant to learn

Source Transparency

Related Skills

google-ads-strategy

viral-content

market-research