Secrets Detection Rules Expert
Expert in pattern matching, regex optimization, false positive reduction, and comprehensive coverage for detecting sensitive credentials in source code.
Core Principles
detection_philosophy: precision_over_recall: principle: "Minimize false positives" reason: "Too many alerts = alert fatigue = ignored alerts"
layered_detection: levels: - "High confidence: Known patterns" - "Medium confidence: Entropy + context" - "Low confidence: Heuristics"
entropy_analysis: purpose: "Detect random strings that might be secrets" threshold: "Shannon entropy > 4.2" context: "Combined with naming patterns"
contextual_validation: factors: - "Variable/key name" - "File location" - "Surrounding code" - "String format"
Rule Categories
AWS Credentials
aws_rules: access_key_id: pattern: "AKIA[0-9A-Z]{16}" confidence: "high" description: "AWS Access Key ID" example: "AKIAIOSFODNN7EXAMPLE"
secret_access_key: pattern: "[A-Za-z0-9/+=]{40}" context_required: - "aws_secret" - "secret_access_key" - "AWS_SECRET" confidence: "high" description: "AWS Secret Access Key"
session_token: pattern: "FwoGZXIvYXdzE[A-Za-z0-9/+=]+" confidence: "high" description: "AWS Session Token"
API Keys & Tokens
api_key_rules: generic_api_key: patterns: - name: "api_key variable" regex: '(?i)(api[-]?key|apikey)\s*[:=]\s*["']?([a-zA-Z0-9-]{20,})["']?' confidence: "medium"
- name: "bearer token"
regex: '(?i)bearer\s+[a-zA-Z0-9_-]{20,}'
confidence: "high"
- name: "authorization header"
regex: '(?i)authorization\s*[:=]\s*["\']?[a-zA-Z0-9_-]{20,}["\']?'
confidence: "medium"
service_specific: github: patterns: - "ghp_[a-zA-Z0-9]{36}" # Personal access token - "gho_[a-zA-Z0-9]{36}" # OAuth access token - "ghu_[a-zA-Z0-9]{36}" # User-to-server token - "ghs_[a-zA-Z0-9]{36}" # Server-to-server token confidence: "high"
slack:
patterns:
- "xoxb-[0-9]{10,}-[0-9]{10,}-[a-zA-Z0-9]{24}" # Bot token
- "xoxp-[0-9]{10,}-[0-9]{10,}-[a-zA-Z0-9]{24}" # User token
- "xoxa-[0-9]{10,}-[0-9]{10,}-[a-zA-Z0-9]{24}" # App token
- "xoxr-[0-9]{10,}-[0-9]{10,}-[a-zA-Z0-9]{24}" # Refresh token
confidence: "high"
stripe:
patterns:
- "sk_live_[a-zA-Z0-9]{24,}" # Live secret key
- "sk_test_[a-zA-Z0-9]{24,}" # Test secret key
- "rk_live_[a-zA-Z0-9]{24,}" # Restricted key
- "pk_live_[a-zA-Z0-9]{24,}" # Publishable key (lower risk)
confidence: "high"
google:
patterns:
- "AIza[0-9A-Za-z_-]{35}" # API key
- "[0-9]+-[a-z0-9_]{32}\\.apps\\.googleusercontent\\.com" # OAuth client
confidence: "high"
twilio:
patterns:
- "SK[a-f0-9]{32}" # API key
- "AC[a-f0-9]{32}" # Account SID
confidence: "high"
sendgrid:
pattern: "SG\\.[a-zA-Z0-9_-]{22}\\.[a-zA-Z0-9_-]{43}"
confidence: "high"
mailchimp:
pattern: "[a-f0-9]{32}-us[0-9]{1,2}"
confidence: "high"
Database Credentials
database_rules: connection_strings: postgresql: pattern: 'postgres(?:ql)?://[^:]+:[^@]+@[^/]+/[^\s"''`]+' confidence: "high" example: "postgresql://user:password@localhost:5432/db"
mysql:
pattern: 'mysql://[^:]+:[^@]+@[^/]+/[^\s"\''`]+'
confidence: "high"
mongodb:
pattern: 'mongodb(?:\+srv)?://[^:]+:[^@]+@[^\s"\''`]+'
confidence: "high"
example: "mongodb+srv://user:pass@cluster.mongodb.net/db"
redis:
pattern: 'redis://[^:]*:[^@]+@[^\s"\''`]+'
confidence: "high"
password_patterns:
variable_assignment:
patterns:
- '(?i)(password|passwd|pwd)\s*[:=]\s*["'']([^"\''\s]{8,})"'']' - '(?i)db_pass(?:word)?\s*[:=]\s*["\''["''`]'
exclude:
- "password123"
- "changeme"
- "example"
- "${.*}"
Private Keys
private_key_rules: rsa: pattern: "-----BEGIN RSA PRIVATE KEY-----" confidence: "high" multiline: true
openssh: pattern: "-----BEGIN OPENSSH PRIVATE KEY-----" confidence: "high" multiline: true
ec: pattern: "-----BEGIN EC PRIVATE KEY-----" confidence: "high" multiline: true
pgp: pattern: "-----BEGIN PGP PRIVATE KEY BLOCK-----" confidence: "high" multiline: true
generic: pattern: "-----BEGIN PRIVATE KEY-----" confidence: "high" multiline: true
JWT Tokens
jwt_rules: jwt_token: pattern: "eyJ[a-zA-Z0-9_-]\.eyJ[a-zA-Z0-9_-]\.[a-zA-Z0-9_-]*" confidence: "medium" validation: - "Decode header to verify structure" - "Check payload for sensitive claims" - "Verify not expired test token"
jwt_context: high_confidence: - "In Authorization header" - "Named as 'token' or 'jwt'" - "In API response" low_confidence: - "In test files" - "In documentation" - "Expired payload"
Entropy Analysis
Shannon entropy calculation
import math from collections import Counter
def calculate_entropy(s: str) -> float: """Calculate Shannon entropy of a string.""" if not s: return 0.0
length = len(s)
frequencies = Counter(s)
entropy = 0.0
for count in frequencies.values():
probability = count / length
entropy -= probability * math.log2(probability)
return entropy
def is_high_entropy(s: str, threshold: float = 4.2) -> bool: """Check if string has high entropy (likely a secret).""" # Minimum length check if len(s) < 16: return False
# Calculate entropy
entropy = calculate_entropy(s)
return entropy >= threshold
Entropy thresholds by type
ENTROPY_THRESHOLDS = { "api_key": 4.2, "password": 3.5, "token": 4.5, "hash": 4.8 }
False Positive Reduction
whitelist_patterns: placeholders: patterns: - "YOUR_.HERE" - "REPLACE." - "INSERT_." - "xxx+" - "\+" - "<.>" - "\$\{.\}" - "\{\{.*\}\}" action: "ignore"
test_values: patterns: - "test." - "fake." - "dummy." - "example." - "sample." - "mock." action: "ignore"
common_false_positives: patterns: - "0{16,}" # All zeros - "1{16,}" # All ones - "abcd.*" # Sequential - "password123" - "changeme" - "secret123" action: "ignore"
path_exclusions: directories: - "node_modules/" - "vendor/" - ".git/" - "pycache/" - "build/" - "dist/" - "coverage/"
file_patterns: - ".min.js" - ".min.css" - ".map" - ".lock" - "package-lock.json" - "yarn.lock"
documentation: - ".md" - ".rst" - "*.txt" - "docs/" - "examples/"
context_validation: safe_patterns: - "process.env." - "os.environ." - "System.getenv." - "ENV['.']" - "config.get.*"
suspicious_patterns: - "hardcoded" - "= "[^"]{20,}"" - "= '[^']{20,}'"
Rule Configuration
.secrets-detection.yml
version: "1.0"
rules:
-
id: "aws-access-key" pattern: "AKIA[0-9A-Z]{16}" severity: "critical" enabled: true
-
id: "generic-api-key" pattern: '(?i)(api[-]?key|apikey)\s*[:=]\s*["']?([a-zA-Z0-9-]{20,})["']?' severity: "high" enabled: true entropy_check: true entropy_threshold: 4.2
-
id: "private-key" pattern: "-----BEGIN .* PRIVATE KEY-----" severity: "critical" enabled: true multiline: true
exclude: paths: - "test/" - "spec/" - ".test." - ".spec." - "fixtures/" - "mocks/"
patterns: - "EXAMPLE_." - "._PLACEHOLDER" - "\$\{.*\}"
report: format: "json" output: "secrets-report.json" fail_on: "critical"
performance: max_file_size: "10MB" timeout_per_file: "30s" parallel_files: 4
CI/CD Integration
GitHub Actions
.github/workflows/secrets-scan.yml
name: Secrets Detection
on: push: branches: [main, develop] pull_request: branches: [main]
jobs: scan: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 with: fetch-depth: 0
- name: Detect secrets
uses: gitleaks/gitleaks-action@v2
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: Trufflehog scan
uses: trufflesecurity/trufflehog@main
with:
path: ./
base: ${{ github.event.repository.default_branch }}
head: HEAD
extra_args: --only-verified
- name: Upload results
if: failure()
uses: actions/upload-artifact@v4
with:
name: secrets-report
path: secrets-report.json
Pre-commit Hook
.pre-commit-config.yaml
repos:
-
repo: https://github.com/gitleaks/gitleaks rev: v8.18.0 hooks:
- id: gitleaks name: Detect secrets entry: gitleaks protect --verbose --redact language: golang pass_filenames: false
-
repo: https://github.com/Yelp/detect-secrets rev: v1.4.0 hooks:
- id: detect-secrets args: ['--baseline', '.secrets.baseline']
Performance Optimization
optimization_strategies: regex: use_atomic_groups: true avoid_backtracking: true possessive_quantifiers: true example: bad: "(a+)+" good: "(?>a+)"
scanning: progressive: - "Phase 1: High confidence patterns" - "Phase 2: Medium confidence + entropy" - "Phase 3: Low confidence heuristics"
early_exit:
- "Skip binary files"
- "Skip files > 10MB"
- "Skip whitelisted paths"
caching: - "Cache compiled regexes" - "Cache file hashes" - "Incremental scanning"
resource_limits: max_file_size: "10MB" timeout_per_file: "30s" max_line_length: "10000" parallel_workers: 4
Remediation
remediation_steps: immediate: - "Revoke compromised credential" - "Rotate the secret" - "Remove from git history" - "Audit access logs"
git_history_cleanup: commands: - "git filter-branch --force --index-filter" - "BFG Repo-Cleaner for large repos" - "git-filter-repo for complex cases" warning: "Requires force push, coordinate with team"
prevention: - "Use environment variables" - "Use secrets management (Vault, AWS Secrets Manager)" - "Enable pre-commit hooks" - "Implement CI/CD scanning" - "Regular rotation schedule"
Лучшие практики
-
Precision over recall — меньше ложных срабатываний
-
Layered detection — комбинируй паттерны и энтропию
-
Context matters — учитывай окружение и naming
-
Whitelist carefully — документируй исключения
-
Scan early — pre-commit hooks + CI/CD
-
Rotate on detection — compromised = revoked