Secrets Detection Rules Expert

Expert in pattern matching, regex optimization, false positive reduction, and comprehensive coverage for detecting sensitive credentials in source code.

Core Principles

detection_philosophy: precision_over_recall: principle: "Minimize false positives" reason: "Too many alerts = alert fatigue = ignored alerts"

layered_detection: levels: - "High confidence: Known patterns" - "Medium confidence: Entropy + context" - "Low confidence: Heuristics"

entropy_analysis: purpose: "Detect random strings that might be secrets" threshold: "Shannon entropy > 4.2" context: "Combined with naming patterns"

contextual_validation: factors: - "Variable/key name" - "File location" - "Surrounding code" - "String format"

Rule Categories

AWS Credentials

aws_rules: access_key_id: pattern: "AKIA[0-9A-Z]{16}" confidence: "high" description: "AWS Access Key ID" example: "AKIAIOSFODNN7EXAMPLE"

secret_access_key: pattern: "[A-Za-z0-9/+=]{40}" context_required: - "aws_secret" - "secret_access_key" - "AWS_SECRET" confidence: "high" description: "AWS Secret Access Key"

session_token: pattern: "FwoGZXIvYXdzE[A-Za-z0-9/+=]+" confidence: "high" description: "AWS Session Token"

API Keys & Tokens

api_key_rules: generic_api_key: patterns: - name: "api_key variable" regex: '(?i)(api[-]?key|apikey)\s*[:=]\s*["']?([a-zA-Z0-9-]{20,})["']?' confidence: "medium"

  - name: "bearer token"
    regex: '(?i)bearer\s+[a-zA-Z0-9_-]{20,}'
    confidence: "high"

  - name: "authorization header"
    regex: '(?i)authorization\s*[:=]\s*["\']?[a-zA-Z0-9_-]{20,}["\']?'
    confidence: "medium"

service_specific: github: patterns: - "ghp_[a-zA-Z0-9]{36}" # Personal access token - "gho_[a-zA-Z0-9]{36}" # OAuth access token - "ghu_[a-zA-Z0-9]{36}" # User-to-server token - "ghs_[a-zA-Z0-9]{36}" # Server-to-server token confidence: "high"

slack:
  patterns:
    - "xoxb-[0-9]{10,}-[0-9]{10,}-[a-zA-Z0-9]{24}"  # Bot token
    - "xoxp-[0-9]{10,}-[0-9]{10,}-[a-zA-Z0-9]{24}"  # User token
    - "xoxa-[0-9]{10,}-[0-9]{10,}-[a-zA-Z0-9]{24}"  # App token
    - "xoxr-[0-9]{10,}-[0-9]{10,}-[a-zA-Z0-9]{24}"  # Refresh token
  confidence: "high"

stripe:
  patterns:
    - "sk_live_[a-zA-Z0-9]{24,}"  # Live secret key
    - "sk_test_[a-zA-Z0-9]{24,}"  # Test secret key
    - "rk_live_[a-zA-Z0-9]{24,}"  # Restricted key
    - "pk_live_[a-zA-Z0-9]{24,}"  # Publishable key (lower risk)
  confidence: "high"

google:
  patterns:
    - "AIza[0-9A-Za-z_-]{35}"  # API key
    - "[0-9]+-[a-z0-9_]{32}\\.apps\\.googleusercontent\\.com"  # OAuth client
  confidence: "high"

twilio:
  patterns:
    - "SK[a-f0-9]{32}"  # API key
    - "AC[a-f0-9]{32}"  # Account SID
  confidence: "high"

sendgrid:
  pattern: "SG\\.[a-zA-Z0-9_-]{22}\\.[a-zA-Z0-9_-]{43}"
  confidence: "high"

mailchimp:
  pattern: "[a-f0-9]{32}-us[0-9]{1,2}"
  confidence: "high"

Database Credentials

database_rules: connection_strings: postgresql: pattern: 'postgres(?:ql)?://[^:]+:[^@]+@[^/]+/[^\s"''`]+' confidence: "high" example: "postgresql://user:password@localhost:5432/db"

mysql:
  pattern: 'mysql://[^:]+:[^@]+@[^/]+/[^\s"\''`]+'
  confidence: "high"

mongodb:
  pattern: 'mongodb(?:\+srv)?://[^:]+:[^@]+@[^\s"\''`]+'
  confidence: "high"
  example: "mongodb+srv://user:pass@cluster.mongodb.net/db"

redis:
  pattern: 'redis://[^:]*:[^@]+@[^\s"\''`]+'
  confidence: "high"

password_patterns: variable_assignment: patterns: - '(?i)(password|passwd|pwd)\s*[:=]\s*["'']([^"\''\s]{8,})"'']' - '(?i)db_pass(?:word)?\s*[:=]\s*["\''["''`]' exclude: - "password123" - "changeme" - "example" - "${.*}"

Private Keys

private_key_rules: rsa: pattern: "-----BEGIN RSA PRIVATE KEY-----" confidence: "high" multiline: true

openssh: pattern: "-----BEGIN OPENSSH PRIVATE KEY-----" confidence: "high" multiline: true

ec: pattern: "-----BEGIN EC PRIVATE KEY-----" confidence: "high" multiline: true

pgp: pattern: "-----BEGIN PGP PRIVATE KEY BLOCK-----" confidence: "high" multiline: true

generic: pattern: "-----BEGIN PRIVATE KEY-----" confidence: "high" multiline: true

JWT Tokens

jwt_rules: jwt_token: pattern: "eyJ[a-zA-Z0-9_-]\.eyJ[a-zA-Z0-9_-]\.[a-zA-Z0-9_-]*" confidence: "medium" validation: - "Decode header to verify structure" - "Check payload for sensitive claims" - "Verify not expired test token"

jwt_context: high_confidence: - "In Authorization header" - "Named as 'token' or 'jwt'" - "In API response" low_confidence: - "In test files" - "In documentation" - "Expired payload"

Entropy Analysis

Shannon entropy calculation

import math from collections import Counter

def calculate_entropy(s: str) -> float: """Calculate Shannon entropy of a string.""" if not s: return 0.0

length = len(s)
frequencies = Counter(s)

entropy = 0.0
for count in frequencies.values():
    probability = count / length
    entropy -= probability * math.log2(probability)

return entropy

def is_high_entropy(s: str, threshold: float = 4.2) -> bool: """Check if string has high entropy (likely a secret).""" # Minimum length check if len(s) < 16: return False

# Calculate entropy
entropy = calculate_entropy(s)

return entropy >= threshold

Entropy thresholds by type

ENTROPY_THRESHOLDS = { "api_key": 4.2, "password": 3.5, "token": 4.5, "hash": 4.8 }

False Positive Reduction

whitelist_patterns: placeholders: patterns: - "YOUR_.HERE" - "REPLACE." - "INSERT_." - "xxx+" - "\+" - "<.>" - "\$\{.\}" - "\{\{.*\}\}" action: "ignore"

test_values: patterns: - "test." - "fake." - "dummy." - "example." - "sample." - "mock." action: "ignore"

common_false_positives: patterns: - "0{16,}" # All zeros - "1{16,}" # All ones - "abcd.*" # Sequential - "password123" - "changeme" - "secret123" action: "ignore"

path_exclusions: directories: - "node_modules/" - "vendor/" - ".git/" - "pycache/" - "build/" - "dist/" - "coverage/"

file_patterns: - ".min.js" - ".min.css" - ".map" - ".lock" - "package-lock.json" - "yarn.lock"

documentation: - ".md" - ".rst" - "*.txt" - "docs/" - "examples/"

context_validation: safe_patterns: - "process.env." - "os.environ." - "System.getenv." - "ENV['.']" - "config.get.*"

suspicious_patterns: - "hardcoded" - "= "[^"]{20,}"" - "= '[^']{20,}'"

Rule Configuration

.secrets-detection.yml

version: "1.0"

rules:

id: "aws-access-key" pattern: "AKIA[0-9A-Z]{16}" severity: "critical" enabled: true
id: "generic-api-key" pattern: '(?i)(api[-]?key|apikey)\s*[:=]\s*["']?([a-zA-Z0-9-]{20,})["']?' severity: "high" enabled: true entropy_check: true entropy_threshold: 4.2
id: "private-key" pattern: "-----BEGIN .* PRIVATE KEY-----" severity: "critical" enabled: true multiline: true

exclude: paths: - "test/" - "spec/" - ".test." - ".spec." - "fixtures/" - "mocks/"

patterns: - "EXAMPLE_." - "._PLACEHOLDER" - "\$\{.*\}"

report: format: "json" output: "secrets-report.json" fail_on: "critical"

performance: max_file_size: "10MB" timeout_per_file: "30s" parallel_files: 4

CI/CD Integration

GitHub Actions

.github/workflows/secrets-scan.yml

name: Secrets Detection

on: push: branches: [main, develop] pull_request: branches: [main]

jobs: scan: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 with: fetch-depth: 0

  - name: Detect secrets
    uses: gitleaks/gitleaks-action@v2
    env:
      GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

  - name: Trufflehog scan
    uses: trufflesecurity/trufflehog@main
    with:
      path: ./
      base: ${{ github.event.repository.default_branch }}
      head: HEAD
      extra_args: --only-verified

  - name: Upload results
    if: failure()
    uses: actions/upload-artifact@v4
    with:
      name: secrets-report
      path: secrets-report.json

Pre-commit Hook

.pre-commit-config.yaml

repos:

repo: https://github.com/gitleaks/gitleaks rev: v8.18.0 hooks:
- id: gitleaks name: Detect secrets entry: gitleaks protect --verbose --redact language: golang pass_filenames: false
repo: https://github.com/Yelp/detect-secrets rev: v1.4.0 hooks:
- id: detect-secrets args: ['--baseline', '.secrets.baseline']

Performance Optimization

optimization_strategies: regex: use_atomic_groups: true avoid_backtracking: true possessive_quantifiers: true example: bad: "(a+)+" good: "(?>a+)"

scanning: progressive: - "Phase 1: High confidence patterns" - "Phase 2: Medium confidence + entropy" - "Phase 3: Low confidence heuristics"

early_exit:
  - "Skip binary files"
  - "Skip files > 10MB"
  - "Skip whitelisted paths"

caching: - "Cache compiled regexes" - "Cache file hashes" - "Incremental scanning"

resource_limits: max_file_size: "10MB" timeout_per_file: "30s" max_line_length: "10000" parallel_workers: 4

Remediation

remediation_steps: immediate: - "Revoke compromised credential" - "Rotate the secret" - "Remove from git history" - "Audit access logs"

git_history_cleanup: commands: - "git filter-branch --force --index-filter" - "BFG Repo-Cleaner for large repos" - "git-filter-repo for complex cases" warning: "Requires force push, coordinate with team"

prevention: - "Use environment variables" - "Use secrets management (Vault, AWS Secrets Manager)" - "Enable pre-commit hooks" - "Implement CI/CD scanning" - "Regular rotation schedule"

Лучшие практики

Precision over recall — меньше ложных срабатываний
Layered detection — комбинируй паттерны и энтропию
Context matters — учитывай окружение и naming
Whitelist carefully — документируй исключения
Scan early — pre-commit hooks + CI/CD
Rotate on detection — compromised = revoked

secrets-detection-rules

Safety Notice

Copy this and send it to your AI assistant to learn

Shannon entropy calculation

Entropy thresholds by type

.secrets-detection.yml

.github/workflows/secrets-scan.yml

.pre-commit-config.yaml

Source Transparency

Related Skills

social-media-marketing

video-marketing

frontend-design

k6-load-test