feat(docs): add LLM Handbook + Google Whitepaper integration v3.3.0
Advanced Guardrails: - prompt-injection-detector.sh (PreToolUse) - output-validator.sh (PostToolUse heuristics) - claudemd-scanner.sh (SessionStart injection detection) - output-secrets-scanner.sh (PostToolUse secrets leak prevention) Observability & Monitoring: - session-logger.sh (JSONL activity logging) - session-stats.sh (cost tracking & analysis) - guide/observability.md (full documentation) LLM-as-a-Judge Evaluation: - output-evaluator.md agent (Haiku) - /validate-changes command - pre-commit-evaluator.sh (opt-in git hook) Google Agent Whitepaper Integration: - Context Triage Guide (Section 2.2.4) - CLAUDE.md Injection Warning (Section 3.1.3) - Agent Validation Checklist (Section 4.2.4) - MCP Security: Tool Shadowing & Confused Deputy (Section 8.6) - Session vs Memory patterns (Section 3.3.3) Stats: 10 new files, 8 modified, 5 new guide sections Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
19110eba22
commit
8a4d116e2e
17 changed files with 2188 additions and 3 deletions
98
examples/hooks/bash/claudemd-scanner.sh
Normal file
98
examples/hooks/bash/claudemd-scanner.sh
Normal file
|
|
@ -0,0 +1,98 @@
|
|||
#!/bin/bash
|
||||
# =============================================================================
|
||||
# CLAUDE.md Injection Scanner Hook
|
||||
# =============================================================================
|
||||
# Event: SessionStart (runs when Claude Code session begins)
|
||||
# Purpose: Detect potential prompt injection attacks in CLAUDE.md files
|
||||
#
|
||||
# Installation:
|
||||
# Add to .claude/settings.json:
|
||||
# {
|
||||
# "hooks": {
|
||||
# "SessionStart": [{
|
||||
# "matcher": "",
|
||||
# "hooks": ["bash examples/hooks/bash/claudemd-scanner.sh"]
|
||||
# }]
|
||||
# }
|
||||
# }
|
||||
#
|
||||
# What it detects:
|
||||
# - "ignore previous instructions" patterns (common injection technique)
|
||||
# - Shell command execution attempts (curl|bash, wget|sh, eval)
|
||||
# - Base64 encoded content (potential obfuscation)
|
||||
# - Suspicious HTML comments that might hide instructions
|
||||
# =============================================================================
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# Define suspicious patterns (case-insensitive)
|
||||
SUSPICIOUS_PATTERNS=(
|
||||
"ignore.*previous.*instruction"
|
||||
"ignore.*all.*instruction"
|
||||
"disregard.*instruction"
|
||||
"forget.*instruction"
|
||||
"new.*instruction.*follow"
|
||||
"curl.*\|.*bash"
|
||||
"curl.*\|.*sh"
|
||||
"wget.*\|.*bash"
|
||||
"wget.*\|.*sh"
|
||||
"eval\s*\("
|
||||
"base64.*decode"
|
||||
"\$\(.*curl"
|
||||
"\$\(.*wget"
|
||||
"<!--.*ignore"
|
||||
"<!--.*instruction"
|
||||
)
|
||||
|
||||
WARNINGS=()
|
||||
|
||||
# Function to scan a file for suspicious patterns
|
||||
scan_file() {
|
||||
local file="$1"
|
||||
|
||||
if [[ ! -f "$file" ]]; then
|
||||
return 0
|
||||
fi
|
||||
|
||||
for pattern in "${SUSPICIOUS_PATTERNS[@]}"; do
|
||||
if grep -qiE "$pattern" "$file" 2>/dev/null; then
|
||||
WARNINGS+=("Suspicious pattern in $file: matches '$pattern'")
|
||||
fi
|
||||
done
|
||||
|
||||
# Check for very long single lines (potential obfuscation)
|
||||
if awk 'length > 500' "$file" | grep -q .; then
|
||||
WARNINGS+=("Warning: $file contains very long lines (potential obfuscation)")
|
||||
fi
|
||||
|
||||
# Check for uncommon Unicode characters (potential homoglyph attack)
|
||||
if grep -P '[^\x00-\x7F]' "$file" 2>/dev/null | grep -qiE "instruction|ignore|run|execute"; then
|
||||
WARNINGS+=("Warning: $file contains non-ASCII characters near sensitive keywords")
|
||||
fi
|
||||
}
|
||||
|
||||
# Scan all potential CLAUDE.md locations
|
||||
scan_file "CLAUDE.md"
|
||||
scan_file ".claude/CLAUDE.md"
|
||||
|
||||
# Also scan any .md files in .claude/ directory that might be loaded
|
||||
if [[ -d ".claude" ]]; then
|
||||
for md_file in .claude/*.md; do
|
||||
[[ -f "$md_file" ]] && scan_file "$md_file"
|
||||
done
|
||||
fi
|
||||
|
||||
# Output warnings if any found
|
||||
if [[ ${#WARNINGS[@]} -gt 0 ]]; then
|
||||
# Construct JSON response with system message
|
||||
WARNING_TEXT="SECURITY WARNING - Suspicious content detected:\\n"
|
||||
for warning in "${WARNINGS[@]}"; do
|
||||
WARNING_TEXT+="- $warning\\n"
|
||||
done
|
||||
WARNING_TEXT+="\\nReview these files before proceeding. See: https://github.com/FlorianBruniaux/claude-code-ultimate-guide/guide/ultimate-guide.md#security-warning-claudemd-injection"
|
||||
|
||||
echo "{\"systemMessage\": \"$WARNING_TEXT\"}"
|
||||
fi
|
||||
|
||||
# Always exit 0 to not block session (just warn)
|
||||
exit 0
|
||||
97
examples/hooks/bash/output-secrets-scanner.sh
Normal file
97
examples/hooks/bash/output-secrets-scanner.sh
Normal file
|
|
@ -0,0 +1,97 @@
|
|||
#!/bin/bash
|
||||
# =============================================================================
|
||||
# Output Secrets Scanner Hook
|
||||
# =============================================================================
|
||||
# Event: PostToolUse (runs after each tool execution)
|
||||
# Purpose: Detect secrets that might leak in tool outputs
|
||||
#
|
||||
# This complements security-check.sh (which scans inputs). This hook scans
|
||||
# outputs to catch secrets that Claude might inadvertently expose.
|
||||
#
|
||||
# Installation:
|
||||
# Add to .claude/settings.json:
|
||||
# {
|
||||
# "hooks": {
|
||||
# "PostToolUse": [{
|
||||
# "matcher": "",
|
||||
# "hooks": ["bash examples/hooks/bash/output-secrets-scanner.sh"]
|
||||
# }]
|
||||
# }
|
||||
# }
|
||||
#
|
||||
# What it detects:
|
||||
# - API keys (OpenAI, Anthropic, AWS, GCP, Azure, Stripe, etc.)
|
||||
# - Private keys and certificates
|
||||
# - Database connection strings with passwords
|
||||
# - GitHub/GitLab tokens
|
||||
# - JWT tokens
|
||||
# =============================================================================
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# Read the hook input from stdin
|
||||
INPUT=$(cat)
|
||||
|
||||
# Extract tool output from JSON (handle both formats)
|
||||
TOOL_OUTPUT=$(echo "$INPUT" | jq -r '.tool_output // .output // ""' 2>/dev/null || echo "")
|
||||
|
||||
# If no output or empty, exit cleanly
|
||||
if [[ -z "$TOOL_OUTPUT" ]]; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Secret patterns to detect
|
||||
declare -A SECRET_PATTERNS=(
|
||||
# API Keys
|
||||
["OpenAI API Key"]="sk-[a-zA-Z0-9]{20,}"
|
||||
["Anthropic API Key"]="sk-ant-[a-zA-Z0-9]{20,}"
|
||||
["AWS Access Key"]="AKIA[0-9A-Z]{16}"
|
||||
["AWS Secret Key"]="[0-9a-zA-Z/+]{40}"
|
||||
["GCP API Key"]="AIza[0-9A-Za-z_-]{35}"
|
||||
["Azure Key"]="[a-zA-Z0-9]{32,}"
|
||||
["Stripe Key"]="(sk|pk)_(live|test)_[0-9a-zA-Z]{24,}"
|
||||
["Twilio Key"]="SK[a-f0-9]{32}"
|
||||
["SendGrid Key"]="SG\.[a-zA-Z0-9_-]{22}\.[a-zA-Z0-9_-]{43}"
|
||||
|
||||
# Tokens
|
||||
["GitHub Token"]="(ghp|gho|ghu|ghs|ghr)_[a-zA-Z0-9]{36,}"
|
||||
["GitLab Token"]="glpat-[a-zA-Z0-9_-]{20,}"
|
||||
["NPM Token"]="npm_[a-zA-Z0-9]{36}"
|
||||
["PyPI Token"]="pypi-[a-zA-Z0-9_-]{50,}"
|
||||
["JWT Token"]="eyJ[a-zA-Z0-9_-]*\.eyJ[a-zA-Z0-9_-]*\.[a-zA-Z0-9_-]*"
|
||||
|
||||
# Private Keys
|
||||
["Private Key"]="-----BEGIN (RSA |EC |DSA |OPENSSH )?PRIVATE KEY-----"
|
||||
["PGP Private Key"]="-----BEGIN PGP PRIVATE KEY BLOCK-----"
|
||||
|
||||
# Database
|
||||
["Database URL with Password"]="(postgres|mysql|mongodb)://[^:]+:[^@]+@"
|
||||
["Redis URL with Password"]="redis://:[^@]+@"
|
||||
|
||||
# Generic
|
||||
["Generic API Key"]="(api[_-]?key|apikey|api[_-]?secret)['\"]?\s*[:=]\s*['\"]?[a-zA-Z0-9_-]{20,}"
|
||||
["Generic Secret"]="(secret|password|passwd|pwd)['\"]?\s*[:=]\s*['\"]?[^\s'\"]{8,}"
|
||||
)
|
||||
|
||||
DETECTED_SECRETS=()
|
||||
|
||||
# Check each pattern
|
||||
for secret_type in "${!SECRET_PATTERNS[@]}"; do
|
||||
pattern="${SECRET_PATTERNS[$secret_type]}"
|
||||
if echo "$TOOL_OUTPUT" | grep -qiE "$pattern" 2>/dev/null; then
|
||||
DETECTED_SECRETS+=("$secret_type")
|
||||
fi
|
||||
done
|
||||
|
||||
# If secrets detected, warn via systemMessage
|
||||
if [[ ${#DETECTED_SECRETS[@]} -gt 0 ]]; then
|
||||
SECRETS_LIST=$(printf ", %s" "${DETECTED_SECRETS[@]}")
|
||||
SECRETS_LIST=${SECRETS_LIST:2} # Remove leading ", "
|
||||
|
||||
WARNING_MSG="SECRET LEAK WARNING: Potential secrets detected in output: $SECRETS_LIST. Do NOT commit or share this output. Consider using environment variables or a secrets manager."
|
||||
|
||||
echo "{\"systemMessage\": \"$WARNING_MSG\"}"
|
||||
fi
|
||||
|
||||
# Always exit 0 (warn, don't block)
|
||||
exit 0
|
||||
184
examples/hooks/bash/output-validator.sh
Executable file
184
examples/hooks/bash/output-validator.sh
Executable file
|
|
@ -0,0 +1,184 @@
|
|||
#!/bin/bash
|
||||
# Hook: PostToolUse - Validate Claude's outputs for quality issues
|
||||
# Exit 0 = allow (always), but emit systemMessage warnings
|
||||
#
|
||||
# This hook performs heuristic validation of Claude's outputs to detect:
|
||||
# - Potential hallucinations (fabricated paths, functions)
|
||||
# - Sensitive data leakage in outputs
|
||||
# - High uncertainty indicators
|
||||
#
|
||||
# This is a lightweight heuristic check, not a full LLM evaluation.
|
||||
# For deeper validation, use the output-evaluator agent.
|
||||
#
|
||||
# Place in: .claude/hooks/output-validator.sh
|
||||
# Register in: .claude/settings.json under PostToolUse event
|
||||
|
||||
set -e
|
||||
|
||||
# Read JSON from stdin
|
||||
INPUT=$(cat)
|
||||
|
||||
TOOL_NAME=$(echo "$INPUT" | jq -r '.tool_name // empty')
|
||||
TOOL_OUTPUT=$(echo "$INPUT" | jq -r '.tool_output // empty')
|
||||
|
||||
# Only validate tools that produce code/content outputs
|
||||
case "$TOOL_NAME" in
|
||||
Edit|Write|Bash)
|
||||
;;
|
||||
*)
|
||||
exit 0
|
||||
;;
|
||||
esac
|
||||
|
||||
WARNINGS=()
|
||||
|
||||
# === FABRICATED FILE PATHS ===
|
||||
# Detect paths that look suspicious (common hallucination patterns)
|
||||
SUSPICIOUS_PATHS=(
|
||||
"/path/to/"
|
||||
"/your/project/"
|
||||
"/example/"
|
||||
"/foo/bar/"
|
||||
"/my/app/"
|
||||
"/user/project/"
|
||||
"C:\\Users\\User\\"
|
||||
"C:\\path\\to\\"
|
||||
)
|
||||
|
||||
for pattern in "${SUSPICIOUS_PATHS[@]}"; do
|
||||
if [[ "$TOOL_OUTPUT" == *"$pattern"* ]]; then
|
||||
WARNINGS+=("Suspicious placeholder path detected: '$pattern'")
|
||||
fi
|
||||
done
|
||||
|
||||
# === PLACEHOLDER CONTENT ===
|
||||
# Detect common placeholder patterns that shouldn't be in production code
|
||||
PLACEHOLDER_PATTERNS=(
|
||||
"TODO:"
|
||||
"FIXME:"
|
||||
"XXX:"
|
||||
"HACK:"
|
||||
"your-api-key"
|
||||
"your_api_key"
|
||||
"YOUR_API_KEY"
|
||||
"sk-..."
|
||||
"pk_test_"
|
||||
"pk_live_"
|
||||
"api_key_here"
|
||||
"replace_with"
|
||||
"insert_your"
|
||||
"placeholder"
|
||||
"example.com"
|
||||
"foo@bar.com"
|
||||
"test@test.com"
|
||||
)
|
||||
|
||||
for pattern in "${PLACEHOLDER_PATTERNS[@]}"; do
|
||||
if [[ "$TOOL_OUTPUT" == *"$pattern"* ]]; then
|
||||
WARNINGS+=("Placeholder content detected: '$pattern'")
|
||||
fi
|
||||
done
|
||||
|
||||
# === SENSITIVE DATA LEAKAGE ===
|
||||
# Detect potential secrets in output (could indicate data exposure)
|
||||
SECRET_PATTERNS=(
|
||||
# AWS
|
||||
'AKIA[0-9A-Z]{16}'
|
||||
# Generic API keys (long hex strings)
|
||||
'[a-f0-9]{32,}'
|
||||
# Private keys
|
||||
'-----BEGIN.*PRIVATE KEY-----'
|
||||
'-----BEGIN RSA'
|
||||
'-----BEGIN EC'
|
||||
# JWT tokens
|
||||
'eyJ[A-Za-z0-9_-]*\.eyJ[A-Za-z0-9_-]*\.'
|
||||
# Password patterns
|
||||
'password["\x27]?\s*[=:]\s*["\x27][^"\x27]{8,}'
|
||||
)
|
||||
|
||||
for pattern in "${SECRET_PATTERNS[@]}"; do
|
||||
if echo "$TOOL_OUTPUT" | grep -qE "$pattern" 2>/dev/null; then
|
||||
WARNINGS+=("Potential sensitive data in output (pattern: ${pattern:0:20}...)")
|
||||
fi
|
||||
done
|
||||
|
||||
# === UNCERTAINTY INDICATORS ===
|
||||
# Detect high uncertainty language that might indicate guessing
|
||||
UNCERTAINTY_PATTERNS=(
|
||||
"I'm not sure"
|
||||
"I think it might"
|
||||
"probably"
|
||||
"possibly"
|
||||
"might be"
|
||||
"could be"
|
||||
"I believe"
|
||||
"I assume"
|
||||
"I guess"
|
||||
"if I recall"
|
||||
"from memory"
|
||||
"I don't have access"
|
||||
"I cannot verify"
|
||||
)
|
||||
|
||||
UNCERTAINTY_COUNT=0
|
||||
TOOL_OUTPUT_LOWER=$(echo "$TOOL_OUTPUT" | tr '[:upper:]' '[:lower:]')
|
||||
|
||||
for pattern in "${UNCERTAINTY_PATTERNS[@]}"; do
|
||||
pattern_lower=$(echo "$pattern" | tr '[:upper:]' '[:lower:]')
|
||||
if [[ "$TOOL_OUTPUT_LOWER" == *"$pattern_lower"* ]]; then
|
||||
((UNCERTAINTY_COUNT++))
|
||||
fi
|
||||
done
|
||||
|
||||
if [[ $UNCERTAINTY_COUNT -ge 3 ]]; then
|
||||
WARNINGS+=("High uncertainty detected ($UNCERTAINTY_COUNT indicators) - verify output accuracy")
|
||||
fi
|
||||
|
||||
# === INCOMPLETE IMPLEMENTATIONS ===
|
||||
# Detect code that looks incomplete
|
||||
INCOMPLETE_PATTERNS=(
|
||||
"not implemented"
|
||||
"NotImplementedError"
|
||||
"throw new Error.*implement"
|
||||
"// TODO"
|
||||
"# TODO"
|
||||
"pass # "
|
||||
"raise NotImplemented"
|
||||
"undefined"
|
||||
)
|
||||
|
||||
for pattern in "${INCOMPLETE_PATTERNS[@]}"; do
|
||||
if echo "$TOOL_OUTPUT" | grep -qiE "$pattern" 2>/dev/null; then
|
||||
WARNINGS+=("Incomplete implementation detected: '$pattern'")
|
||||
fi
|
||||
done
|
||||
|
||||
# === HALLUCINATION INDICATORS ===
|
||||
# Detect patterns that often indicate hallucinated content
|
||||
HALLUCINATION_PATTERNS=(
|
||||
"According to the documentation"
|
||||
"As stated in"
|
||||
"The official guide says"
|
||||
"Based on the API reference"
|
||||
)
|
||||
|
||||
for pattern in "${HALLUCINATION_PATTERNS[@]}"; do
|
||||
if [[ "$TOOL_OUTPUT" == *"$pattern"* ]]; then
|
||||
WARNINGS+=("Unverified reference claim: '$pattern' - verify source")
|
||||
fi
|
||||
done
|
||||
|
||||
# === OUTPUT WARNINGS ===
|
||||
if [[ ${#WARNINGS[@]} -gt 0 ]]; then
|
||||
WARNING_MSG="Output validation warnings:\\n"
|
||||
for warn in "${WARNINGS[@]}"; do
|
||||
WARNING_MSG+=" - $warn\\n"
|
||||
done
|
||||
WARNING_MSG+="\\nReview output carefully before accepting."
|
||||
|
||||
# Emit as systemMessage (warning, not blocking)
|
||||
echo "{\"systemMessage\": \"$WARNING_MSG\"}"
|
||||
fi
|
||||
|
||||
# Always allow (this hook warns, doesn't block)
|
||||
exit 0
|
||||
207
examples/hooks/bash/pre-commit-evaluator.sh
Executable file
207
examples/hooks/bash/pre-commit-evaluator.sh
Executable file
|
|
@ -0,0 +1,207 @@
|
|||
#!/bin/bash
|
||||
# Git pre-commit hook: LLM-as-a-Judge evaluation before commit
|
||||
#
|
||||
# This hook uses Claude to evaluate staged changes before allowing a commit.
|
||||
# It's an OPT-IN feature due to API costs and latency.
|
||||
#
|
||||
# COST WARNING: Each commit evaluation costs ~$0.01-0.05 (Haiku model)
|
||||
#
|
||||
# Installation:
|
||||
# 1. Copy to your repo: cp pre-commit-evaluator.sh .git/hooks/pre-commit
|
||||
# 2. Make executable: chmod +x .git/hooks/pre-commit
|
||||
# 3. Set required env var: export CLAUDE_PRECOMMIT_EVAL=1
|
||||
#
|
||||
# Environment Variables:
|
||||
# CLAUDE_PRECOMMIT_EVAL - Set to "1" to enable (default: disabled)
|
||||
# CLAUDE_EVAL_MODEL - Model to use (default: haiku)
|
||||
# CLAUDE_EVAL_THRESHOLD - Minimum score to pass (default: 7)
|
||||
# CLAUDE_EVAL_SKIP_PATHS - Colon-separated paths to skip (e.g., "docs:*.md")
|
||||
#
|
||||
# Bypass for single commit:
|
||||
# CLAUDE_SKIP_EVAL=1 git commit -m "message"
|
||||
# or
|
||||
# git commit --no-verify -m "message"
|
||||
|
||||
set -e
|
||||
|
||||
# Check if evaluation is enabled
|
||||
if [[ "${CLAUDE_PRECOMMIT_EVAL:-0}" != "1" ]]; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Check for bypass
|
||||
if [[ "${CLAUDE_SKIP_EVAL:-0}" == "1" ]]; then
|
||||
echo "Skipping LLM evaluation (CLAUDE_SKIP_EVAL=1)"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Configuration
|
||||
MODEL="${CLAUDE_EVAL_MODEL:-haiku}"
|
||||
THRESHOLD="${CLAUDE_EVAL_THRESHOLD:-7}"
|
||||
SKIP_PATHS="${CLAUDE_EVAL_SKIP_PATHS:-}"
|
||||
|
||||
# Colors
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
CYAN='\033[0;36m'
|
||||
NC='\033[0m'
|
||||
|
||||
# Check for staged changes
|
||||
STAGED_FILES=$(git diff --cached --name-only)
|
||||
if [[ -z "$STAGED_FILES" ]]; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Filter out skipped paths
|
||||
if [[ -n "$SKIP_PATHS" ]]; then
|
||||
IFS=':' read -ra SKIP_ARRAY <<< "$SKIP_PATHS"
|
||||
FILTERED_FILES=""
|
||||
for file in $STAGED_FILES; do
|
||||
skip=false
|
||||
for pattern in "${SKIP_ARRAY[@]}"; do
|
||||
if [[ "$file" == $pattern ]]; then
|
||||
skip=true
|
||||
break
|
||||
fi
|
||||
done
|
||||
if [[ "$skip" == "false" ]]; then
|
||||
FILTERED_FILES="$FILTERED_FILES $file"
|
||||
fi
|
||||
done
|
||||
STAGED_FILES=$(echo "$FILTERED_FILES" | xargs)
|
||||
fi
|
||||
|
||||
# Exit if all files were filtered
|
||||
if [[ -z "$STAGED_FILES" ]]; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Count files
|
||||
FILE_COUNT=$(echo "$STAGED_FILES" | wc -w | tr -d ' ')
|
||||
|
||||
echo -e "${CYAN}Evaluating $FILE_COUNT staged file(s) with Claude ($MODEL)...${NC}"
|
||||
echo -e "${YELLOW}Cost: ~\$0.01-0.05 per evaluation${NC}"
|
||||
echo ""
|
||||
|
||||
# Get the diff
|
||||
DIFF=$(git diff --cached)
|
||||
|
||||
# Truncate diff if too large (to control costs)
|
||||
MAX_CHARS=50000
|
||||
if [[ ${#DIFF} -gt $MAX_CHARS ]]; then
|
||||
echo -e "${YELLOW}Warning: Diff truncated to ${MAX_CHARS} chars for cost control${NC}"
|
||||
DIFF="${DIFF:0:$MAX_CHARS}
|
||||
|
||||
[TRUNCATED - diff exceeded ${MAX_CHARS} characters]"
|
||||
fi
|
||||
|
||||
# Prepare the prompt
|
||||
PROMPT="You are a code quality evaluator. Analyze this git diff and provide a JSON evaluation.
|
||||
|
||||
Score each criterion from 0-10:
|
||||
- correctness: Does the code work correctly?
|
||||
- completeness: Is the implementation complete (no TODOs, stubs)?
|
||||
- safety: No secrets, no security issues?
|
||||
|
||||
Respond ONLY with valid JSON in this format:
|
||||
{
|
||||
\"verdict\": \"APPROVE\" or \"NEEDS_REVIEW\" or \"REJECT\",
|
||||
\"scores\": {\"correctness\": N, \"completeness\": N, \"safety\": N},
|
||||
\"issues\": [{\"severity\": \"high/medium/low\", \"description\": \"...\"}],
|
||||
\"summary\": \"One sentence summary\"
|
||||
}
|
||||
|
||||
Rules:
|
||||
- APPROVE if all scores >= $THRESHOLD and no high-severity issues
|
||||
- NEEDS_REVIEW if any score is 5-$((THRESHOLD-1)) or medium issues exist
|
||||
- REJECT if any score < 5 or high-severity security issues
|
||||
|
||||
Git diff to evaluate:
|
||||
|
||||
$DIFF"
|
||||
|
||||
# Call Claude (requires claude CLI to be installed and authenticated)
|
||||
if ! command -v claude &> /dev/null; then
|
||||
echo -e "${RED}Error: 'claude' CLI not found. Install Claude Code first.${NC}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Run evaluation
|
||||
RESULT=$(echo "$PROMPT" | claude --model "$MODEL" --print 2>/dev/null) || {
|
||||
echo -e "${RED}Error: Claude evaluation failed${NC}"
|
||||
echo "You can bypass with: CLAUDE_SKIP_EVAL=1 git commit"
|
||||
exit 1
|
||||
}
|
||||
|
||||
# Extract JSON from response (handle potential markdown wrapping)
|
||||
JSON_RESULT=$(echo "$RESULT" | grep -o '{.*}' | head -1)
|
||||
|
||||
if [[ -z "$JSON_RESULT" ]]; then
|
||||
echo -e "${YELLOW}Warning: Could not parse evaluation result${NC}"
|
||||
echo "Raw response: $RESULT"
|
||||
echo ""
|
||||
echo "Proceeding with commit (evaluation inconclusive)"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Parse result
|
||||
VERDICT=$(echo "$JSON_RESULT" | jq -r '.verdict // "UNKNOWN"')
|
||||
CORRECTNESS=$(echo "$JSON_RESULT" | jq -r '.scores.correctness // 0')
|
||||
COMPLETENESS=$(echo "$JSON_RESULT" | jq -r '.scores.completeness // 0')
|
||||
SAFETY=$(echo "$JSON_RESULT" | jq -r '.scores.safety // 0')
|
||||
SUMMARY=$(echo "$JSON_RESULT" | jq -r '.summary // "No summary"')
|
||||
ISSUES=$(echo "$JSON_RESULT" | jq -r '.issues // []')
|
||||
|
||||
# Display results
|
||||
echo ""
|
||||
echo -e "${CYAN}═══════════════════════════════════════════════════════════${NC}"
|
||||
echo -e "${CYAN} Evaluation Results${NC}"
|
||||
echo -e "${CYAN}═══════════════════════════════════════════════════════════${NC}"
|
||||
echo ""
|
||||
echo " Correctness: $CORRECTNESS/10"
|
||||
echo " Completeness: $COMPLETENESS/10"
|
||||
echo " Safety: $SAFETY/10"
|
||||
echo ""
|
||||
echo " Summary: $SUMMARY"
|
||||
echo ""
|
||||
|
||||
# Show issues if any
|
||||
ISSUE_COUNT=$(echo "$ISSUES" | jq 'length')
|
||||
if [[ "$ISSUE_COUNT" -gt 0 ]]; then
|
||||
echo " Issues found:"
|
||||
echo "$ISSUES" | jq -r '.[] | " [\(.severity | ascii_upcase)] \(.description)"'
|
||||
echo ""
|
||||
fi
|
||||
|
||||
# Handle verdict
|
||||
case "$VERDICT" in
|
||||
APPROVE)
|
||||
echo -e "${GREEN}✓ APPROVED - Proceeding with commit${NC}"
|
||||
echo ""
|
||||
exit 0
|
||||
;;
|
||||
NEEDS_REVIEW)
|
||||
echo -e "${YELLOW}⚠ NEEDS_REVIEW - Issues detected${NC}"
|
||||
echo ""
|
||||
echo "Options:"
|
||||
echo " 1. Fix issues and try again"
|
||||
echo " 2. Bypass: CLAUDE_SKIP_EVAL=1 git commit"
|
||||
echo " 3. Skip hook: git commit --no-verify"
|
||||
echo ""
|
||||
exit 1
|
||||
;;
|
||||
REJECT)
|
||||
echo -e "${RED}✗ REJECTED - Critical issues found${NC}"
|
||||
echo ""
|
||||
echo "Please fix the issues before committing."
|
||||
echo "To force commit anyway: git commit --no-verify"
|
||||
echo ""
|
||||
exit 1
|
||||
;;
|
||||
*)
|
||||
echo -e "${YELLOW}? Unknown verdict: $VERDICT${NC}"
|
||||
echo "Proceeding with commit (evaluation inconclusive)"
|
||||
exit 0
|
||||
;;
|
||||
esac
|
||||
182
examples/hooks/bash/prompt-injection-detector.sh
Executable file
182
examples/hooks/bash/prompt-injection-detector.sh
Executable file
|
|
@ -0,0 +1,182 @@
|
|||
#!/bin/bash
|
||||
# Hook: PreToolUse - Detect prompt injection attempts
|
||||
# Exit 0 = allow, Exit 2 = block (stderr message shown to Claude)
|
||||
#
|
||||
# This hook detects common prompt injection patterns that attempt to
|
||||
# manipulate Claude's behavior through malicious instructions.
|
||||
#
|
||||
# Place in: .claude/hooks/prompt-injection-detector.sh
|
||||
# Register in: .claude/settings.json under PreToolUse event
|
||||
|
||||
set -e
|
||||
|
||||
# Read JSON from stdin
|
||||
INPUT=$(cat)
|
||||
|
||||
TOOL_NAME=$(echo "$INPUT" | jq -r '.tool_name // empty')
|
||||
TOOL_INPUT=$(echo "$INPUT" | jq -r '.tool_input // empty')
|
||||
|
||||
# Only check tools that handle user-provided text content
|
||||
case "$TOOL_NAME" in
|
||||
Bash|Write|Edit|WebFetch)
|
||||
;;
|
||||
*)
|
||||
exit 0
|
||||
;;
|
||||
esac
|
||||
|
||||
# Extract content to analyze based on tool type
|
||||
CONTENT=""
|
||||
case "$TOOL_NAME" in
|
||||
Bash)
|
||||
CONTENT=$(echo "$TOOL_INPUT" | jq -r '.command // empty')
|
||||
;;
|
||||
Write|Edit)
|
||||
CONTENT=$(echo "$TOOL_INPUT" | jq -r '.content // .new_string // empty')
|
||||
;;
|
||||
WebFetch)
|
||||
CONTENT=$(echo "$TOOL_INPUT" | jq -r '.url // empty')
|
||||
;;
|
||||
esac
|
||||
|
||||
# Skip if no content to analyze
|
||||
[[ -z "$CONTENT" ]] && exit 0
|
||||
|
||||
# Convert to lowercase for case-insensitive matching
|
||||
CONTENT_LOWER=$(echo "$CONTENT" | tr '[:upper:]' '[:lower:]')
|
||||
|
||||
# === ROLE OVERRIDE PATTERNS ===
|
||||
# Attempts to override Claude's instructions or identity
|
||||
ROLE_OVERRIDE_PATTERNS=(
|
||||
"ignore previous instructions"
|
||||
"ignore all previous"
|
||||
"ignore your instructions"
|
||||
"disregard previous"
|
||||
"disregard your instructions"
|
||||
"forget your instructions"
|
||||
"forget everything"
|
||||
"you are now"
|
||||
"act as if"
|
||||
"pretend you are"
|
||||
"pretend to be"
|
||||
"from now on you"
|
||||
"new instructions:"
|
||||
"override:"
|
||||
"system prompt:"
|
||||
)
|
||||
|
||||
for pattern in "${ROLE_OVERRIDE_PATTERNS[@]}"; do
|
||||
if [[ "$CONTENT_LOWER" == *"$pattern"* ]]; then
|
||||
echo "BLOCKED: Prompt injection detected - role override attempt: '$pattern'" >&2
|
||||
exit 2
|
||||
fi
|
||||
done
|
||||
|
||||
# === JAILBREAK PATTERNS ===
|
||||
# Known jailbreak techniques and bypass attempts
|
||||
JAILBREAK_PATTERNS=(
|
||||
"dan mode"
|
||||
"developer mode"
|
||||
"jailbreak"
|
||||
"do anything now"
|
||||
"bypass"
|
||||
"no restrictions"
|
||||
"without restrictions"
|
||||
"unrestricted mode"
|
||||
"god mode"
|
||||
"sudo mode"
|
||||
"admin mode"
|
||||
"debug mode"
|
||||
"maintenance mode"
|
||||
)
|
||||
|
||||
for pattern in "${JAILBREAK_PATTERNS[@]}"; do
|
||||
if [[ "$CONTENT_LOWER" == *"$pattern"* ]]; then
|
||||
echo "BLOCKED: Prompt injection detected - jailbreak attempt: '$pattern'" >&2
|
||||
exit 2
|
||||
fi
|
||||
done
|
||||
|
||||
# === DELIMITER INJECTION ===
|
||||
# Attempts to inject fake system messages or close prompt sections
|
||||
DELIMITER_PATTERNS=(
|
||||
"</system>"
|
||||
"<|endoftext|>"
|
||||
"<|im_end|>"
|
||||
"[/INST]"
|
||||
"[INST]"
|
||||
"<<SYS>>"
|
||||
"<</SYS>>"
|
||||
"### System:"
|
||||
"### Human:"
|
||||
"### Assistant:"
|
||||
"```system"
|
||||
"SYSTEM:"
|
||||
)
|
||||
|
||||
for pattern in "${DELIMITER_PATTERNS[@]}"; do
|
||||
if [[ "$CONTENT" == *"$pattern"* ]]; then
|
||||
echo "BLOCKED: Prompt injection detected - delimiter injection: '$pattern'" >&2
|
||||
exit 2
|
||||
fi
|
||||
done
|
||||
|
||||
# === AUTHORITY IMPERSONATION ===
|
||||
# Claims of special authority or permissions
|
||||
AUTHORITY_PATTERNS=(
|
||||
"anthropic employee"
|
||||
"anthropic staff"
|
||||
"i am your creator"
|
||||
"i am your developer"
|
||||
"i have admin access"
|
||||
"authorized to bypass"
|
||||
"emergency override"
|
||||
"security exception"
|
||||
"this is a test"
|
||||
"testing mode"
|
||||
)
|
||||
|
||||
for pattern in "${AUTHORITY_PATTERNS[@]}"; do
|
||||
if [[ "$CONTENT_LOWER" == *"$pattern"* ]]; then
|
||||
echo "BLOCKED: Prompt injection detected - authority impersonation: '$pattern'" >&2
|
||||
exit 2
|
||||
fi
|
||||
done
|
||||
|
||||
# === BASE64 ENCODED INSTRUCTIONS ===
|
||||
# Detect potential base64-encoded payloads (heuristic)
|
||||
# Look for long base64-like strings that might contain instructions
|
||||
if echo "$CONTENT" | grep -qE '[A-Za-z0-9+/]{50,}={0,2}'; then
|
||||
# Try to decode and check for injection patterns
|
||||
DECODED=$(echo "$CONTENT" | grep -oE '[A-Za-z0-9+/]{50,}={0,2}' | head -1 | base64 -d 2>/dev/null || true)
|
||||
DECODED_LOWER=$(echo "$DECODED" | tr '[:upper:]' '[:lower:]')
|
||||
|
||||
for pattern in "ignore" "override" "system" "jailbreak" "dan mode"; do
|
||||
if [[ "$DECODED_LOWER" == *"$pattern"* ]]; then
|
||||
echo "BLOCKED: Prompt injection detected - encoded payload containing: '$pattern'" >&2
|
||||
exit 2
|
||||
fi
|
||||
done
|
||||
fi
|
||||
|
||||
# === CONTEXT MANIPULATION ===
|
||||
# Attempts to manipulate the conversation context
|
||||
CONTEXT_PATTERNS=(
|
||||
"in the previous message"
|
||||
"as i mentioned earlier"
|
||||
"you agreed to"
|
||||
"you already said"
|
||||
"you promised"
|
||||
"remember when you"
|
||||
"our agreement was"
|
||||
)
|
||||
|
||||
for pattern in "${CONTEXT_PATTERNS[@]}"; do
|
||||
if [[ "$CONTENT_LOWER" == *"$pattern"* ]]; then
|
||||
# Warning only - these could be legitimate
|
||||
echo '{"systemMessage": "Warning: Detected potential context manipulation pattern. Verify legitimacy."}'
|
||||
fi
|
||||
done
|
||||
|
||||
# Allow by default
|
||||
exit 0
|
||||
102
examples/hooks/bash/session-logger.sh
Executable file
102
examples/hooks/bash/session-logger.sh
Executable file
|
|
@ -0,0 +1,102 @@
|
|||
#!/bin/bash
|
||||
# Hook: PostToolUse - Log all Claude Code operations for monitoring
|
||||
# Exit 0 = allow (always)
|
||||
#
|
||||
# This hook logs all tool operations to JSONL files for later analysis.
|
||||
# Use session-stats.sh to analyze the logs.
|
||||
#
|
||||
# Logs are stored in: ~/.claude/logs/activity-YYYY-MM-DD.jsonl
|
||||
#
|
||||
# Environment variables:
|
||||
# CLAUDE_LOG_DIR - Override log directory (default: ~/.claude/logs)
|
||||
# CLAUDE_LOG_TOKENS - Enable token estimation (default: true)
|
||||
# CLAUDE_SESSION_ID - Session identifier (auto-generated if not set)
|
||||
#
|
||||
# Place in: .claude/hooks/session-logger.sh
|
||||
# Register in: .claude/settings.json under PostToolUse event
|
||||
|
||||
set -e
|
||||
|
||||
# Configuration
|
||||
LOG_DIR="${CLAUDE_LOG_DIR:-$HOME/.claude/logs}"
|
||||
ENABLE_TOKENS="${CLAUDE_LOG_TOKENS:-true}"
|
||||
SESSION_ID="${CLAUDE_SESSION_ID:-$(date +%s)-$$}"
|
||||
|
||||
# Ensure log directory exists
|
||||
mkdir -p "$LOG_DIR"
|
||||
|
||||
# Log file for today
|
||||
LOG_FILE="$LOG_DIR/activity-$(date +%Y-%m-%d).jsonl"
|
||||
|
||||
# Read JSON from stdin
|
||||
INPUT=$(cat)
|
||||
|
||||
# Extract tool information
|
||||
TOOL_NAME=$(echo "$INPUT" | jq -r '.tool_name // "unknown"')
|
||||
TOOL_INPUT=$(echo "$INPUT" | jq -c '.tool_input // {}')
|
||||
TOOL_OUTPUT=$(echo "$INPUT" | jq -r '.tool_output // ""')
|
||||
|
||||
# Get timestamp
|
||||
TIMESTAMP=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
|
||||
|
||||
# Extract relevant details based on tool type
|
||||
FILE_PATH=""
|
||||
COMMAND=""
|
||||
|
||||
case "$TOOL_NAME" in
|
||||
Read|Write|Edit)
|
||||
FILE_PATH=$(echo "$TOOL_INPUT" | jq -r '.file_path // .path // ""')
|
||||
;;
|
||||
Bash)
|
||||
COMMAND=$(echo "$TOOL_INPUT" | jq -r '.command // ""' | head -c 200)
|
||||
;;
|
||||
Grep|Glob)
|
||||
FILE_PATH=$(echo "$TOOL_INPUT" | jq -r '.path // .pattern // ""')
|
||||
;;
|
||||
esac
|
||||
|
||||
# Estimate tokens (rough heuristic: ~4 chars per token)
|
||||
TOKENS_INPUT=0
|
||||
TOKENS_OUTPUT=0
|
||||
|
||||
if [[ "$ENABLE_TOKENS" == "true" ]]; then
|
||||
INPUT_LEN=${#TOOL_INPUT}
|
||||
OUTPUT_LEN=${#TOOL_OUTPUT}
|
||||
TOKENS_INPUT=$((INPUT_LEN / 4))
|
||||
TOKENS_OUTPUT=$((OUTPUT_LEN / 4))
|
||||
fi
|
||||
|
||||
# Get project directory (if available)
|
||||
PROJECT_DIR="${CLAUDE_PROJECT_DIR:-$(pwd)}"
|
||||
PROJECT_NAME=$(basename "$PROJECT_DIR")
|
||||
|
||||
# Build log entry
|
||||
LOG_ENTRY=$(jq -n \
|
||||
--arg timestamp "$TIMESTAMP" \
|
||||
--arg session_id "$SESSION_ID" \
|
||||
--arg tool "$TOOL_NAME" \
|
||||
--arg file "$FILE_PATH" \
|
||||
--arg command "$COMMAND" \
|
||||
--arg project "$PROJECT_NAME" \
|
||||
--argjson tokens_in "$TOKENS_INPUT" \
|
||||
--argjson tokens_out "$TOKENS_OUTPUT" \
|
||||
'{
|
||||
timestamp: $timestamp,
|
||||
session_id: $session_id,
|
||||
tool: $tool,
|
||||
file: (if $file != "" then $file else null end),
|
||||
command: (if $command != "" then $command else null end),
|
||||
project: $project,
|
||||
tokens: {
|
||||
input: $tokens_in,
|
||||
output: $tokens_out,
|
||||
total: ($tokens_in + $tokens_out)
|
||||
}
|
||||
} | with_entries(select(.value != null))'
|
||||
)
|
||||
|
||||
# Append to log file
|
||||
echo "$LOG_ENTRY" >> "$LOG_FILE"
|
||||
|
||||
# Always allow
|
||||
exit 0
|
||||
Loading…
Add table
Add a link
Reference in a new issue