Flaky Test Detective

Diagnose and eliminate flaky tests systematically.

Common Flaky Test Patterns

Timing Issues

// ❌ Flaky: Race condition test("should load user data", async () => { render(<UserProfile userId="123" />);

// Race condition - might pass or fail expect(screen.getByText("John Doe")).toBeInTheDocument(); });

// ✅ Fixed: Wait for element test("should load user data", async () => { render(<UserProfile userId="123" />);

await waitFor(() => { expect(screen.getByText("John Doe")).toBeInTheDocument(); }); });

// ❌ Flaky: Fixed timeout test("should complete animation", async () => { render(<AnimatedComponent />); await new Promise((resolve) => setTimeout(resolve, 500)); // Brittle! expect(element).toHaveClass("animated"); });

// ✅ Fixed: Wait for condition test("should complete animation", async () => { render(<AnimatedComponent />); await waitFor( () => { expect(element).toHaveClass("animated"); }, { timeout: 2000 } ); });

Shared State

// ❌ Flaky: Global state pollution let userId = "123";

test("test A", () => { userId = "456"; // Modifies global // ... });

test("test B", () => { expect(userId).toBe("123"); // Fails if test A runs first! });

// ✅ Fixed: Isolated state test("test A", () => { const userId = "456"; // Local variable // ... });

test("test B", () => { const userId = "123"; expect(userId).toBe("123"); });

// ❌ Flaky: Database not cleaned test("should create user", async () => { await db.user.create({ email: "test@example.com" }); // No cleanup! });

test("should create another user", async () => { await db.user.create({ email: "test@example.com" }); // Fails! Duplicate });

// ✅ Fixed: Proper cleanup afterEach(async () => { await db.user.deleteMany(); });

Randomness

// ❌ Flaky: Random data test("should sort users", () => { const users = generateRandomUsers(10); // Different each time! const sorted = sortUsers(users); expect(sorted[0].name).toBe("Alice"); // Might not be Alice });

// ✅ Fixed: Deterministic data test("should sort users", () => { const users = [ { name: "Charlie", age: 30 }, { name: "Alice", age: 25 }, { name: "Bob", age: 35 }, ]; const sorted = sortUsers(users); expect(sorted[0].name).toBe("Alice"); });

// ✅ Fixed: Seeded randomness import { faker } from "@faker-js/faker";

beforeEach(() => { faker.seed(12345); // Same data every time });

Network Dependencies

// ❌ Flaky: Real API call test("should fetch users", async () => { const users = await fetchUsers(); // External API! expect(users).toHaveLength(10); // Might fail if API down });

// ✅ Fixed: Mocked API test("should fetch users", async () => { server.use( http.get("/api/users", () => { return HttpResponse.json([ { id: "1", name: "User 1" }, { id: "2", name: "User 2" }, ]); }) );

const users = await fetchUsers(); expect(users).toHaveLength(2); });

Flaky Test Detection Script

// scripts/detect-flaky-tests.ts import { execSync } from "child_process";

async function detectFlakyTests(iterations: number = 10) { const results = new Map<string, { passed: number; failed: number }>();

for (let i = 0; i < iterations; i++) { console.log(\nRun ${i + 1}/${iterations});

try {
  const output = execSync("npm test -- --reporter=json", {
    encoding: "utf-8",
  });

  const testResults = JSON.parse(output);

  testResults.testResults.forEach((file: any) => {
    file.assertionResults.forEach((test: any) => {
      const key = `${file.name}::${test.fullName}`;
      const stats = results.get(key) || { passed: 0, failed: 0 };

      if (test.status === "passed") {
        stats.passed++;
      } else {
        stats.failed++;
      }

      results.set(key, stats);
    });
  });
} catch (error) {
  console.error("Test run failed:", error);
}

}

// Analyze results console.log("\n🔍 Flaky Test Report\n");

const flakyTests: string[] = [];

results.forEach((stats, testName) => { if (stats.failed > 0 && stats.passed > 0) { const failureRate = (stats.failed / iterations) * 100; console.log(❌ FLAKY: ${testName}); console.log( Passed: ${stats.passed}/${iterations}); console.log( Failed: ${stats.failed}/${iterations}); console.log( Failure rate: ${failureRate.toFixed(1)}%\n); flakyTests.push(testName); } });

if (flakyTests.length === 0) { console.log("✅ No flaky tests detected!"); } else { console.log(\n🚨 Found ${flakyTests.length} flaky tests); process.exit(1); } }

detectFlakyTests(20); // Run tests 20 times

Root Cause Analysis

// Framework for analyzing flaky tests interface FlakyTestAnalysis { testName: string; failureRate: number; symptoms: string[]; rootCause: "timing" | "state" | "randomness" | "network" | "unknown"; recommendation: string; }

function analyzeTest( testName: string, errorMessages: string[] ): FlakyTestAnalysis { const analysis: FlakyTestAnalysis = { testName, failureRate: 0, symptoms: [], rootCause: "unknown", recommendation: "", };

// Detect timing issues if ( errorMessages.some( (msg) => msg.includes("timeout") || msg.includes("not found") ) ) { analysis.symptoms.push("Timeout or element not found"); analysis.rootCause = "timing"; analysis.recommendation = "Add explicit waits using waitFor() or findBy* queries"; }

// Detect shared state if ( errorMessages.some( (msg) => msg.includes("already exists") || msg.includes("unique constraint") ) ) { analysis.symptoms.push("Duplicate or existing data"); analysis.rootCause = "state"; analysis.recommendation = "Add beforeEach/afterEach cleanup or use unique test data"; }

// Detect randomness if ( errorMessages.some( (msg) => msg.includes("expected") && msg.includes("received") ) ) { analysis.symptoms.push("Inconsistent values"); analysis.rootCause = "randomness"; analysis.recommendation = "Use deterministic test data or seed random generators"; }

// Detect network issues if ( errorMessages.some( (msg) => msg.includes("network") || msg.includes("ECONNREFUSED") ) ) { analysis.symptoms.push("Network or connection errors"); analysis.rootCause = "network"; analysis.recommendation = "Mock all network requests using MSW or similar"; }

return analysis; }

Stabilization Guidelines

// Test stability checklist const stabilityChecklist = { timing: [ "Use waitFor() instead of fixed timeouts", "Use findBy* queries (built-in waiting)", "Set appropriate timeout values", "Wait for loading states to disappear", ], state: [ "Clear database before each test", "Reset mocks after each test", "Use test-specific data (unique IDs)", "Avoid global variables", ], randomness: [ "Use fixed seed for random generators", "Use deterministic test data", "Avoid Date.now() - mock time instead", "Generate IDs deterministically", ], network: [ "Mock all API calls", "Use MSW for HTTP mocking", "Avoid real external services", "Test network errors explicitly", ], parallelism: [ "Use isolated databases per test worker", "Avoid port conflicts (random ports)", "Dont share file system state", "Use test.concurrent cautiously", ], };

Auto-Fix Patterns

// Automated fixes for common issues

// Fix 1: Add waitFor to assertions function addWaitFor(code: string): string { // Replace: expect(screen.getByText('...')).toBeInTheDocument() // With: await waitFor(() => expect(screen.getByText('...')).toBeInTheDocument())

return code .replace( /expect(screen.getBy/g, "await waitFor(() => expect(screen.getBy" ) .replace(/).toBeInTheDocument()/g, ").toBeInTheDocument())"); }

// Fix 2: Replace getBy with findBy function replaceGetByWithFindBy(code: string): string { return code.replace(/screen.getBy/g, "await screen.findBy"); }

// Fix 3: Add cleanup function addCleanup(code: string): string { if (!code.includes("afterEach")) { const insertPoint = code.indexOf("test("); return ( code.slice(0, insertPoint) + "afterEach(async () => {\n await cleanup();\n});\n\n" + code.slice(insertPoint) ); } return code; }

Monitoring Flaky Tests in CI

.github/workflows/test-stability.yml

name: Test Stability

on: schedule: - cron: "0 2 * * *" # Run nightly

jobs: stability-check: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4

  - uses: actions/setup-node@v4
    with:
      node-version: "20"

  - run: npm ci

  - name: Run tests 20 times
    run: |
      for i in {1..20}; do
        echo "Run $i/20"
        npm test || echo "FAILED: Run $i"
      done

  - name: Analyze results
    run: npm run detect-flaky-tests

Best Practices

Explicit waits: Never use sleep/timeout
Clean state: Reset between tests
Deterministic data: No randomness
Mock external deps: APIs, time, randomness
Run tests multiple times: Catch intermittent failures
Isolate tests: No shared state
Monitor CI: Track flaky test trends

Output Checklist

Common patterns identified
Root cause analysis performed
Timing issues fixed (waitFor)
Shared state eliminated (cleanup)
Randomness removed (fixed seeds)
Network mocked (MSW)
Detection script implemented
Stabilization guidelines documented
CI monitoring configured

flaky-test-detective

Safety Notice

Copy this and send it to your AI assistant to learn

.github/workflows/test-stability.yml

Source Transparency

Related Skills

websocket-realtime-builder

webhook-receiver-hardener

api-docs-generator