feat(docs): add LLM Handbook + Google Whitepaper integration v3.3.0

Advanced Guardrails:
- prompt-injection-detector.sh (PreToolUse)
- output-validator.sh (PostToolUse heuristics)
- claudemd-scanner.sh (SessionStart injection detection)
- output-secrets-scanner.sh (PostToolUse secrets leak prevention)

Observability & Monitoring:
- session-logger.sh (JSONL activity logging)
- session-stats.sh (cost tracking & analysis)
- guide/observability.md (full documentation)

LLM-as-a-Judge Evaluation:
- output-evaluator.md agent (Haiku)
- /validate-changes command
- pre-commit-evaluator.sh (opt-in git hook)

Google Agent Whitepaper Integration:
- Context Triage Guide (Section 2.2.4)
- CLAUDE.md Injection Warning (Section 3.1.3)
- Agent Validation Checklist (Section 4.2.4)
- MCP Security: Tool Shadowing & Confused Deputy (Section 8.6)
- Session vs Memory patterns (Section 3.3.3)

Stats: 10 new files, 8 modified, 5 new guide sections

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Florian BRUNIAUX 2026-01-14 21:00:49 +01:00
parent 19110eba22
commit 8a4d116e2e
17 changed files with 2188 additions and 3 deletions

View file

@ -0,0 +1,98 @@
#!/bin/bash
# =============================================================================
# CLAUDE.md Injection Scanner Hook
# =============================================================================
# Event: SessionStart (runs when Claude Code session begins)
# Purpose: Detect potential prompt injection attacks in CLAUDE.md files
#
# Installation:
# Add to .claude/settings.json:
# {
# "hooks": {
# "SessionStart": [{
# "matcher": "",
# "hooks": ["bash examples/hooks/bash/claudemd-scanner.sh"]
# }]
# }
# }
#
# What it detects:
# - "ignore previous instructions" patterns (common injection technique)
# - Shell command execution attempts (curl|bash, wget|sh, eval)
# - Base64 encoded content (potential obfuscation)
# - Suspicious HTML comments that might hide instructions
# =============================================================================
set -euo pipefail
# Define suspicious patterns (case-insensitive)
SUSPICIOUS_PATTERNS=(
"ignore.*previous.*instruction"
"ignore.*all.*instruction"
"disregard.*instruction"
"forget.*instruction"
"new.*instruction.*follow"
"curl.*\|.*bash"
"curl.*\|.*sh"
"wget.*\|.*bash"
"wget.*\|.*sh"
"eval\s*\("
"base64.*decode"
"\$\(.*curl"
"\$\(.*wget"
"<!--.*ignore"
"<!--.*instruction"
)
WARNINGS=()
# Function to scan a file for suspicious patterns
scan_file() {
local file="$1"
if [[ ! -f "$file" ]]; then
return 0
fi
for pattern in "${SUSPICIOUS_PATTERNS[@]}"; do
if grep -qiE "$pattern" "$file" 2>/dev/null; then
WARNINGS+=("Suspicious pattern in $file: matches '$pattern'")
fi
done
# Check for very long single lines (potential obfuscation)
if awk 'length > 500' "$file" | grep -q .; then
WARNINGS+=("Warning: $file contains very long lines (potential obfuscation)")
fi
# Check for uncommon Unicode characters (potential homoglyph attack)
if grep -P '[^\x00-\x7F]' "$file" 2>/dev/null | grep -qiE "instruction|ignore|run|execute"; then
WARNINGS+=("Warning: $file contains non-ASCII characters near sensitive keywords")
fi
}
# Scan all potential CLAUDE.md locations
scan_file "CLAUDE.md"
scan_file ".claude/CLAUDE.md"
# Also scan any .md files in .claude/ directory that might be loaded
if [[ -d ".claude" ]]; then
for md_file in .claude/*.md; do
[[ -f "$md_file" ]] && scan_file "$md_file"
done
fi
# Output warnings if any found
if [[ ${#WARNINGS[@]} -gt 0 ]]; then
# Construct JSON response with system message
WARNING_TEXT="SECURITY WARNING - Suspicious content detected:\\n"
for warning in "${WARNINGS[@]}"; do
WARNING_TEXT+="- $warning\\n"
done
WARNING_TEXT+="\\nReview these files before proceeding. See: https://github.com/FlorianBruniaux/claude-code-ultimate-guide/guide/ultimate-guide.md#security-warning-claudemd-injection"
echo "{\"systemMessage\": \"$WARNING_TEXT\"}"
fi
# Always exit 0 to not block session (just warn)
exit 0

View file

@ -0,0 +1,97 @@
#!/bin/bash
# =============================================================================
# Output Secrets Scanner Hook
# =============================================================================
# Event: PostToolUse (runs after each tool execution)
# Purpose: Detect secrets that might leak in tool outputs
#
# This complements security-check.sh (which scans inputs). This hook scans
# outputs to catch secrets that Claude might inadvertently expose.
#
# Installation:
# Add to .claude/settings.json:
# {
# "hooks": {
# "PostToolUse": [{
# "matcher": "",
# "hooks": ["bash examples/hooks/bash/output-secrets-scanner.sh"]
# }]
# }
# }
#
# What it detects:
# - API keys (OpenAI, Anthropic, AWS, GCP, Azure, Stripe, etc.)
# - Private keys and certificates
# - Database connection strings with passwords
# - GitHub/GitLab tokens
# - JWT tokens
# =============================================================================
set -euo pipefail
# Read the hook input from stdin
INPUT=$(cat)
# Extract tool output from JSON (handle both formats)
TOOL_OUTPUT=$(echo "$INPUT" | jq -r '.tool_output // .output // ""' 2>/dev/null || echo "")
# If no output or empty, exit cleanly
if [[ -z "$TOOL_OUTPUT" ]]; then
exit 0
fi
# Secret patterns to detect
declare -A SECRET_PATTERNS=(
# API Keys
["OpenAI API Key"]="sk-[a-zA-Z0-9]{20,}"
["Anthropic API Key"]="sk-ant-[a-zA-Z0-9]{20,}"
["AWS Access Key"]="AKIA[0-9A-Z]{16}"
["AWS Secret Key"]="[0-9a-zA-Z/+]{40}"
["GCP API Key"]="AIza[0-9A-Za-z_-]{35}"
["Azure Key"]="[a-zA-Z0-9]{32,}"
["Stripe Key"]="(sk|pk)_(live|test)_[0-9a-zA-Z]{24,}"
["Twilio Key"]="SK[a-f0-9]{32}"
["SendGrid Key"]="SG\.[a-zA-Z0-9_-]{22}\.[a-zA-Z0-9_-]{43}"
# Tokens
["GitHub Token"]="(ghp|gho|ghu|ghs|ghr)_[a-zA-Z0-9]{36,}"
["GitLab Token"]="glpat-[a-zA-Z0-9_-]{20,}"
["NPM Token"]="npm_[a-zA-Z0-9]{36}"
["PyPI Token"]="pypi-[a-zA-Z0-9_-]{50,}"
["JWT Token"]="eyJ[a-zA-Z0-9_-]*\.eyJ[a-zA-Z0-9_-]*\.[a-zA-Z0-9_-]*"
# Private Keys
["Private Key"]="-----BEGIN (RSA |EC |DSA |OPENSSH )?PRIVATE KEY-----"
["PGP Private Key"]="-----BEGIN PGP PRIVATE KEY BLOCK-----"
# Database
["Database URL with Password"]="(postgres|mysql|mongodb)://[^:]+:[^@]+@"
["Redis URL with Password"]="redis://:[^@]+@"
# Generic
["Generic API Key"]="(api[_-]?key|apikey|api[_-]?secret)['\"]?\s*[:=]\s*['\"]?[a-zA-Z0-9_-]{20,}"
["Generic Secret"]="(secret|password|passwd|pwd)['\"]?\s*[:=]\s*['\"]?[^\s'\"]{8,}"
)
DETECTED_SECRETS=()
# Check each pattern
for secret_type in "${!SECRET_PATTERNS[@]}"; do
pattern="${SECRET_PATTERNS[$secret_type]}"
if echo "$TOOL_OUTPUT" | grep -qiE "$pattern" 2>/dev/null; then
DETECTED_SECRETS+=("$secret_type")
fi
done
# If secrets detected, warn via systemMessage
if [[ ${#DETECTED_SECRETS[@]} -gt 0 ]]; then
SECRETS_LIST=$(printf ", %s" "${DETECTED_SECRETS[@]}")
SECRETS_LIST=${SECRETS_LIST:2} # Remove leading ", "
WARNING_MSG="SECRET LEAK WARNING: Potential secrets detected in output: $SECRETS_LIST. Do NOT commit or share this output. Consider using environment variables or a secrets manager."
echo "{\"systemMessage\": \"$WARNING_MSG\"}"
fi
# Always exit 0 (warn, don't block)
exit 0

View file

@ -0,0 +1,184 @@
#!/bin/bash
# Hook: PostToolUse - Validate Claude's outputs for quality issues
# Exit 0 = allow (always), but emit systemMessage warnings
#
# This hook performs heuristic validation of Claude's outputs to detect:
# - Potential hallucinations (fabricated paths, functions)
# - Sensitive data leakage in outputs
# - High uncertainty indicators
#
# This is a lightweight heuristic check, not a full LLM evaluation.
# For deeper validation, use the output-evaluator agent.
#
# Place in: .claude/hooks/output-validator.sh
# Register in: .claude/settings.json under PostToolUse event
set -e
# Read JSON from stdin
INPUT=$(cat)
TOOL_NAME=$(echo "$INPUT" | jq -r '.tool_name // empty')
TOOL_OUTPUT=$(echo "$INPUT" | jq -r '.tool_output // empty')
# Only validate tools that produce code/content outputs
case "$TOOL_NAME" in
Edit|Write|Bash)
;;
*)
exit 0
;;
esac
WARNINGS=()
# === FABRICATED FILE PATHS ===
# Detect paths that look suspicious (common hallucination patterns)
SUSPICIOUS_PATHS=(
"/path/to/"
"/your/project/"
"/example/"
"/foo/bar/"
"/my/app/"
"/user/project/"
"C:\\Users\\User\\"
"C:\\path\\to\\"
)
for pattern in "${SUSPICIOUS_PATHS[@]}"; do
if [[ "$TOOL_OUTPUT" == *"$pattern"* ]]; then
WARNINGS+=("Suspicious placeholder path detected: '$pattern'")
fi
done
# === PLACEHOLDER CONTENT ===
# Detect common placeholder patterns that shouldn't be in production code
PLACEHOLDER_PATTERNS=(
"TODO:"
"FIXME:"
"XXX:"
"HACK:"
"your-api-key"
"your_api_key"
"YOUR_API_KEY"
"sk-..."
"pk_test_"
"pk_live_"
"api_key_here"
"replace_with"
"insert_your"
"placeholder"
"example.com"
"foo@bar.com"
"test@test.com"
)
for pattern in "${PLACEHOLDER_PATTERNS[@]}"; do
if [[ "$TOOL_OUTPUT" == *"$pattern"* ]]; then
WARNINGS+=("Placeholder content detected: '$pattern'")
fi
done
# === SENSITIVE DATA LEAKAGE ===
# Detect potential secrets in output (could indicate data exposure)
SECRET_PATTERNS=(
# AWS
'AKIA[0-9A-Z]{16}'
# Generic API keys (long hex strings)
'[a-f0-9]{32,}'
# Private keys
'-----BEGIN.*PRIVATE KEY-----'
'-----BEGIN RSA'
'-----BEGIN EC'
# JWT tokens
'eyJ[A-Za-z0-9_-]*\.eyJ[A-Za-z0-9_-]*\.'
# Password patterns
'password["\x27]?\s*[=:]\s*["\x27][^"\x27]{8,}'
)
for pattern in "${SECRET_PATTERNS[@]}"; do
if echo "$TOOL_OUTPUT" | grep -qE "$pattern" 2>/dev/null; then
WARNINGS+=("Potential sensitive data in output (pattern: ${pattern:0:20}...)")
fi
done
# === UNCERTAINTY INDICATORS ===
# Detect high uncertainty language that might indicate guessing
UNCERTAINTY_PATTERNS=(
"I'm not sure"
"I think it might"
"probably"
"possibly"
"might be"
"could be"
"I believe"
"I assume"
"I guess"
"if I recall"
"from memory"
"I don't have access"
"I cannot verify"
)
UNCERTAINTY_COUNT=0
TOOL_OUTPUT_LOWER=$(echo "$TOOL_OUTPUT" | tr '[:upper:]' '[:lower:]')
for pattern in "${UNCERTAINTY_PATTERNS[@]}"; do
pattern_lower=$(echo "$pattern" | tr '[:upper:]' '[:lower:]')
if [[ "$TOOL_OUTPUT_LOWER" == *"$pattern_lower"* ]]; then
((UNCERTAINTY_COUNT++))
fi
done
if [[ $UNCERTAINTY_COUNT -ge 3 ]]; then
WARNINGS+=("High uncertainty detected ($UNCERTAINTY_COUNT indicators) - verify output accuracy")
fi
# === INCOMPLETE IMPLEMENTATIONS ===
# Detect code that looks incomplete
INCOMPLETE_PATTERNS=(
"not implemented"
"NotImplementedError"
"throw new Error.*implement"
"// TODO"
"# TODO"
"pass # "
"raise NotImplemented"
"undefined"
)
for pattern in "${INCOMPLETE_PATTERNS[@]}"; do
if echo "$TOOL_OUTPUT" | grep -qiE "$pattern" 2>/dev/null; then
WARNINGS+=("Incomplete implementation detected: '$pattern'")
fi
done
# === HALLUCINATION INDICATORS ===
# Detect patterns that often indicate hallucinated content
HALLUCINATION_PATTERNS=(
"According to the documentation"
"As stated in"
"The official guide says"
"Based on the API reference"
)
for pattern in "${HALLUCINATION_PATTERNS[@]}"; do
if [[ "$TOOL_OUTPUT" == *"$pattern"* ]]; then
WARNINGS+=("Unverified reference claim: '$pattern' - verify source")
fi
done
# === OUTPUT WARNINGS ===
if [[ ${#WARNINGS[@]} -gt 0 ]]; then
WARNING_MSG="Output validation warnings:\\n"
for warn in "${WARNINGS[@]}"; do
WARNING_MSG+=" - $warn\\n"
done
WARNING_MSG+="\\nReview output carefully before accepting."
# Emit as systemMessage (warning, not blocking)
echo "{\"systemMessage\": \"$WARNING_MSG\"}"
fi
# Always allow (this hook warns, doesn't block)
exit 0

View file

@ -0,0 +1,207 @@
#!/bin/bash
# Git pre-commit hook: LLM-as-a-Judge evaluation before commit
#
# This hook uses Claude to evaluate staged changes before allowing a commit.
# It's an OPT-IN feature due to API costs and latency.
#
# COST WARNING: Each commit evaluation costs ~$0.01-0.05 (Haiku model)
#
# Installation:
# 1. Copy to your repo: cp pre-commit-evaluator.sh .git/hooks/pre-commit
# 2. Make executable: chmod +x .git/hooks/pre-commit
# 3. Set required env var: export CLAUDE_PRECOMMIT_EVAL=1
#
# Environment Variables:
# CLAUDE_PRECOMMIT_EVAL - Set to "1" to enable (default: disabled)
# CLAUDE_EVAL_MODEL - Model to use (default: haiku)
# CLAUDE_EVAL_THRESHOLD - Minimum score to pass (default: 7)
# CLAUDE_EVAL_SKIP_PATHS - Colon-separated paths to skip (e.g., "docs:*.md")
#
# Bypass for single commit:
# CLAUDE_SKIP_EVAL=1 git commit -m "message"
# or
# git commit --no-verify -m "message"
set -e
# Check if evaluation is enabled
if [[ "${CLAUDE_PRECOMMIT_EVAL:-0}" != "1" ]]; then
exit 0
fi
# Check for bypass
if [[ "${CLAUDE_SKIP_EVAL:-0}" == "1" ]]; then
echo "Skipping LLM evaluation (CLAUDE_SKIP_EVAL=1)"
exit 0
fi
# Configuration
MODEL="${CLAUDE_EVAL_MODEL:-haiku}"
THRESHOLD="${CLAUDE_EVAL_THRESHOLD:-7}"
SKIP_PATHS="${CLAUDE_EVAL_SKIP_PATHS:-}"
# Colors
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
CYAN='\033[0;36m'
NC='\033[0m'
# Check for staged changes
STAGED_FILES=$(git diff --cached --name-only)
if [[ -z "$STAGED_FILES" ]]; then
exit 0
fi
# Filter out skipped paths
if [[ -n "$SKIP_PATHS" ]]; then
IFS=':' read -ra SKIP_ARRAY <<< "$SKIP_PATHS"
FILTERED_FILES=""
for file in $STAGED_FILES; do
skip=false
for pattern in "${SKIP_ARRAY[@]}"; do
if [[ "$file" == $pattern ]]; then
skip=true
break
fi
done
if [[ "$skip" == "false" ]]; then
FILTERED_FILES="$FILTERED_FILES $file"
fi
done
STAGED_FILES=$(echo "$FILTERED_FILES" | xargs)
fi
# Exit if all files were filtered
if [[ -z "$STAGED_FILES" ]]; then
exit 0
fi
# Count files
FILE_COUNT=$(echo "$STAGED_FILES" | wc -w | tr -d ' ')
echo -e "${CYAN}Evaluating $FILE_COUNT staged file(s) with Claude ($MODEL)...${NC}"
echo -e "${YELLOW}Cost: ~\$0.01-0.05 per evaluation${NC}"
echo ""
# Get the diff
DIFF=$(git diff --cached)
# Truncate diff if too large (to control costs)
MAX_CHARS=50000
if [[ ${#DIFF} -gt $MAX_CHARS ]]; then
echo -e "${YELLOW}Warning: Diff truncated to ${MAX_CHARS} chars for cost control${NC}"
DIFF="${DIFF:0:$MAX_CHARS}
[TRUNCATED - diff exceeded ${MAX_CHARS} characters]"
fi
# Prepare the prompt
PROMPT="You are a code quality evaluator. Analyze this git diff and provide a JSON evaluation.
Score each criterion from 0-10:
- correctness: Does the code work correctly?
- completeness: Is the implementation complete (no TODOs, stubs)?
- safety: No secrets, no security issues?
Respond ONLY with valid JSON in this format:
{
\"verdict\": \"APPROVE\" or \"NEEDS_REVIEW\" or \"REJECT\",
\"scores\": {\"correctness\": N, \"completeness\": N, \"safety\": N},
\"issues\": [{\"severity\": \"high/medium/low\", \"description\": \"...\"}],
\"summary\": \"One sentence summary\"
}
Rules:
- APPROVE if all scores >= $THRESHOLD and no high-severity issues
- NEEDS_REVIEW if any score is 5-$((THRESHOLD-1)) or medium issues exist
- REJECT if any score < 5 or high-severity security issues
Git diff to evaluate:
$DIFF"
# Call Claude (requires claude CLI to be installed and authenticated)
if ! command -v claude &> /dev/null; then
echo -e "${RED}Error: 'claude' CLI not found. Install Claude Code first.${NC}"
exit 1
fi
# Run evaluation
RESULT=$(echo "$PROMPT" | claude --model "$MODEL" --print 2>/dev/null) || {
echo -e "${RED}Error: Claude evaluation failed${NC}"
echo "You can bypass with: CLAUDE_SKIP_EVAL=1 git commit"
exit 1
}
# Extract JSON from response (handle potential markdown wrapping)
JSON_RESULT=$(echo "$RESULT" | grep -o '{.*}' | head -1)
if [[ -z "$JSON_RESULT" ]]; then
echo -e "${YELLOW}Warning: Could not parse evaluation result${NC}"
echo "Raw response: $RESULT"
echo ""
echo "Proceeding with commit (evaluation inconclusive)"
exit 0
fi
# Parse result
VERDICT=$(echo "$JSON_RESULT" | jq -r '.verdict // "UNKNOWN"')
CORRECTNESS=$(echo "$JSON_RESULT" | jq -r '.scores.correctness // 0')
COMPLETENESS=$(echo "$JSON_RESULT" | jq -r '.scores.completeness // 0')
SAFETY=$(echo "$JSON_RESULT" | jq -r '.scores.safety // 0')
SUMMARY=$(echo "$JSON_RESULT" | jq -r '.summary // "No summary"')
ISSUES=$(echo "$JSON_RESULT" | jq -r '.issues // []')
# Display results
echo ""
echo -e "${CYAN}═══════════════════════════════════════════════════════════${NC}"
echo -e "${CYAN} Evaluation Results${NC}"
echo -e "${CYAN}═══════════════════════════════════════════════════════════${NC}"
echo ""
echo " Correctness: $CORRECTNESS/10"
echo " Completeness: $COMPLETENESS/10"
echo " Safety: $SAFETY/10"
echo ""
echo " Summary: $SUMMARY"
echo ""
# Show issues if any
ISSUE_COUNT=$(echo "$ISSUES" | jq 'length')
if [[ "$ISSUE_COUNT" -gt 0 ]]; then
echo " Issues found:"
echo "$ISSUES" | jq -r '.[] | " [\(.severity | ascii_upcase)] \(.description)"'
echo ""
fi
# Handle verdict
case "$VERDICT" in
APPROVE)
echo -e "${GREEN}✓ APPROVED - Proceeding with commit${NC}"
echo ""
exit 0
;;
NEEDS_REVIEW)
echo -e "${YELLOW}⚠ NEEDS_REVIEW - Issues detected${NC}"
echo ""
echo "Options:"
echo " 1. Fix issues and try again"
echo " 2. Bypass: CLAUDE_SKIP_EVAL=1 git commit"
echo " 3. Skip hook: git commit --no-verify"
echo ""
exit 1
;;
REJECT)
echo -e "${RED}✗ REJECTED - Critical issues found${NC}"
echo ""
echo "Please fix the issues before committing."
echo "To force commit anyway: git commit --no-verify"
echo ""
exit 1
;;
*)
echo -e "${YELLOW}? Unknown verdict: $VERDICT${NC}"
echo "Proceeding with commit (evaluation inconclusive)"
exit 0
;;
esac

View file

@ -0,0 +1,182 @@
#!/bin/bash
# Hook: PreToolUse - Detect prompt injection attempts
# Exit 0 = allow, Exit 2 = block (stderr message shown to Claude)
#
# This hook detects common prompt injection patterns that attempt to
# manipulate Claude's behavior through malicious instructions.
#
# Place in: .claude/hooks/prompt-injection-detector.sh
# Register in: .claude/settings.json under PreToolUse event
set -e
# Read JSON from stdin
INPUT=$(cat)
TOOL_NAME=$(echo "$INPUT" | jq -r '.tool_name // empty')
TOOL_INPUT=$(echo "$INPUT" | jq -r '.tool_input // empty')
# Only check tools that handle user-provided text content
case "$TOOL_NAME" in
Bash|Write|Edit|WebFetch)
;;
*)
exit 0
;;
esac
# Extract content to analyze based on tool type
CONTENT=""
case "$TOOL_NAME" in
Bash)
CONTENT=$(echo "$TOOL_INPUT" | jq -r '.command // empty')
;;
Write|Edit)
CONTENT=$(echo "$TOOL_INPUT" | jq -r '.content // .new_string // empty')
;;
WebFetch)
CONTENT=$(echo "$TOOL_INPUT" | jq -r '.url // empty')
;;
esac
# Skip if no content to analyze
[[ -z "$CONTENT" ]] && exit 0
# Convert to lowercase for case-insensitive matching
CONTENT_LOWER=$(echo "$CONTENT" | tr '[:upper:]' '[:lower:]')
# === ROLE OVERRIDE PATTERNS ===
# Attempts to override Claude's instructions or identity
ROLE_OVERRIDE_PATTERNS=(
"ignore previous instructions"
"ignore all previous"
"ignore your instructions"
"disregard previous"
"disregard your instructions"
"forget your instructions"
"forget everything"
"you are now"
"act as if"
"pretend you are"
"pretend to be"
"from now on you"
"new instructions:"
"override:"
"system prompt:"
)
for pattern in "${ROLE_OVERRIDE_PATTERNS[@]}"; do
if [[ "$CONTENT_LOWER" == *"$pattern"* ]]; then
echo "BLOCKED: Prompt injection detected - role override attempt: '$pattern'" >&2
exit 2
fi
done
# === JAILBREAK PATTERNS ===
# Known jailbreak techniques and bypass attempts
JAILBREAK_PATTERNS=(
"dan mode"
"developer mode"
"jailbreak"
"do anything now"
"bypass"
"no restrictions"
"without restrictions"
"unrestricted mode"
"god mode"
"sudo mode"
"admin mode"
"debug mode"
"maintenance mode"
)
for pattern in "${JAILBREAK_PATTERNS[@]}"; do
if [[ "$CONTENT_LOWER" == *"$pattern"* ]]; then
echo "BLOCKED: Prompt injection detected - jailbreak attempt: '$pattern'" >&2
exit 2
fi
done
# === DELIMITER INJECTION ===
# Attempts to inject fake system messages or close prompt sections
DELIMITER_PATTERNS=(
"</system>"
"<|endoftext|>"
"<|im_end|>"
"[/INST]"
"[INST]"
"<<SYS>>"
"<</SYS>>"
"### System:"
"### Human:"
"### Assistant:"
"```system"
"SYSTEM:"
)
for pattern in "${DELIMITER_PATTERNS[@]}"; do
if [[ "$CONTENT" == *"$pattern"* ]]; then
echo "BLOCKED: Prompt injection detected - delimiter injection: '$pattern'" >&2
exit 2
fi
done
# === AUTHORITY IMPERSONATION ===
# Claims of special authority or permissions
AUTHORITY_PATTERNS=(
"anthropic employee"
"anthropic staff"
"i am your creator"
"i am your developer"
"i have admin access"
"authorized to bypass"
"emergency override"
"security exception"
"this is a test"
"testing mode"
)
for pattern in "${AUTHORITY_PATTERNS[@]}"; do
if [[ "$CONTENT_LOWER" == *"$pattern"* ]]; then
echo "BLOCKED: Prompt injection detected - authority impersonation: '$pattern'" >&2
exit 2
fi
done
# === BASE64 ENCODED INSTRUCTIONS ===
# Detect potential base64-encoded payloads (heuristic)
# Look for long base64-like strings that might contain instructions
if echo "$CONTENT" | grep -qE '[A-Za-z0-9+/]{50,}={0,2}'; then
# Try to decode and check for injection patterns
DECODED=$(echo "$CONTENT" | grep -oE '[A-Za-z0-9+/]{50,}={0,2}' | head -1 | base64 -d 2>/dev/null || true)
DECODED_LOWER=$(echo "$DECODED" | tr '[:upper:]' '[:lower:]')
for pattern in "ignore" "override" "system" "jailbreak" "dan mode"; do
if [[ "$DECODED_LOWER" == *"$pattern"* ]]; then
echo "BLOCKED: Prompt injection detected - encoded payload containing: '$pattern'" >&2
exit 2
fi
done
fi
# === CONTEXT MANIPULATION ===
# Attempts to manipulate the conversation context
CONTEXT_PATTERNS=(
"in the previous message"
"as i mentioned earlier"
"you agreed to"
"you already said"
"you promised"
"remember when you"
"our agreement was"
)
for pattern in "${CONTEXT_PATTERNS[@]}"; do
if [[ "$CONTENT_LOWER" == *"$pattern"* ]]; then
# Warning only - these could be legitimate
echo '{"systemMessage": "Warning: Detected potential context manipulation pattern. Verify legitimacy."}'
fi
done
# Allow by default
exit 0

View file

@ -0,0 +1,102 @@
#!/bin/bash
# Hook: PostToolUse - Log all Claude Code operations for monitoring
# Exit 0 = allow (always)
#
# This hook logs all tool operations to JSONL files for later analysis.
# Use session-stats.sh to analyze the logs.
#
# Logs are stored in: ~/.claude/logs/activity-YYYY-MM-DD.jsonl
#
# Environment variables:
# CLAUDE_LOG_DIR - Override log directory (default: ~/.claude/logs)
# CLAUDE_LOG_TOKENS - Enable token estimation (default: true)
# CLAUDE_SESSION_ID - Session identifier (auto-generated if not set)
#
# Place in: .claude/hooks/session-logger.sh
# Register in: .claude/settings.json under PostToolUse event
set -e
# Configuration
LOG_DIR="${CLAUDE_LOG_DIR:-$HOME/.claude/logs}"
ENABLE_TOKENS="${CLAUDE_LOG_TOKENS:-true}"
SESSION_ID="${CLAUDE_SESSION_ID:-$(date +%s)-$$}"
# Ensure log directory exists
mkdir -p "$LOG_DIR"
# Log file for today
LOG_FILE="$LOG_DIR/activity-$(date +%Y-%m-%d).jsonl"
# Read JSON from stdin
INPUT=$(cat)
# Extract tool information
TOOL_NAME=$(echo "$INPUT" | jq -r '.tool_name // "unknown"')
TOOL_INPUT=$(echo "$INPUT" | jq -c '.tool_input // {}')
TOOL_OUTPUT=$(echo "$INPUT" | jq -r '.tool_output // ""')
# Get timestamp
TIMESTAMP=$(date -u +"%Y-%m-%dT%H:%M:%SZ")
# Extract relevant details based on tool type
FILE_PATH=""
COMMAND=""
case "$TOOL_NAME" in
Read|Write|Edit)
FILE_PATH=$(echo "$TOOL_INPUT" | jq -r '.file_path // .path // ""')
;;
Bash)
COMMAND=$(echo "$TOOL_INPUT" | jq -r '.command // ""' | head -c 200)
;;
Grep|Glob)
FILE_PATH=$(echo "$TOOL_INPUT" | jq -r '.path // .pattern // ""')
;;
esac
# Estimate tokens (rough heuristic: ~4 chars per token)
TOKENS_INPUT=0
TOKENS_OUTPUT=0
if [[ "$ENABLE_TOKENS" == "true" ]]; then
INPUT_LEN=${#TOOL_INPUT}
OUTPUT_LEN=${#TOOL_OUTPUT}
TOKENS_INPUT=$((INPUT_LEN / 4))
TOKENS_OUTPUT=$((OUTPUT_LEN / 4))
fi
# Get project directory (if available)
PROJECT_DIR="${CLAUDE_PROJECT_DIR:-$(pwd)}"
PROJECT_NAME=$(basename "$PROJECT_DIR")
# Build log entry
LOG_ENTRY=$(jq -n \
--arg timestamp "$TIMESTAMP" \
--arg session_id "$SESSION_ID" \
--arg tool "$TOOL_NAME" \
--arg file "$FILE_PATH" \
--arg command "$COMMAND" \
--arg project "$PROJECT_NAME" \
--argjson tokens_in "$TOKENS_INPUT" \
--argjson tokens_out "$TOKENS_OUTPUT" \
'{
timestamp: $timestamp,
session_id: $session_id,
tool: $tool,
file: (if $file != "" then $file else null end),
command: (if $command != "" then $command else null end),
project: $project,
tokens: {
input: $tokens_in,
output: $tokens_out,
total: ($tokens_in + $tokens_out)
}
} | with_entries(select(.value != null))'
)
# Append to log file
echo "$LOG_ENTRY" >> "$LOG_FILE"
# Always allow
exit 0