claude-code-ultimate-guide/examples/hooks/bash/prompt-injection-detector.sh
Florian BRUNIAUX 34b2ca7200 feat(security): add security hardening guide and hooks v3.6.0
- Add guide/security-hardening.md (~10K) covering:
  - MCP vetting workflow with CVE-2025-53109/53110, 54135, 54136
  - Prompt injection evasion techniques (Unicode, ANSI, null bytes)
  - Secret detection tool comparison (Gitleaks, TruffleHog, GitGuardian)
  - Incident response procedures

- Add 3 new security hooks:
  - unicode-injection-scanner.sh: zero-width, RTL, ANSI escape detection
  - repo-integrity-scanner.sh: scan README/package.json for injection
  - mcp-config-integrity.sh: verify MCP config hash

- Update existing hooks:
  - prompt-injection-detector.sh: +ANSI, +null bytes, +nested cmd
  - output-secrets-scanner.sh: +env leakage, +generic tokens

- Update cross-references in ultimate-guide.md (§7.4, §8.6)
- Move MCP Security Hardening to Done in IDEAS.md

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-15 07:39:53 +01:00

214 lines
5.9 KiB
Bash
Executable file

#!/bin/bash
# Hook: PreToolUse - Detect prompt injection attempts
# Exit 0 = allow, Exit 2 = block (stderr message shown to Claude)
#
# This hook detects common prompt injection patterns that attempt to
# manipulate Claude's behavior through malicious instructions.
#
# Place in: .claude/hooks/prompt-injection-detector.sh
# Register in: .claude/settings.json under PreToolUse event
set -e
# Read JSON from stdin
INPUT=$(cat)
TOOL_NAME=$(echo "$INPUT" | jq -r '.tool_name // empty')
TOOL_INPUT=$(echo "$INPUT" | jq -r '.tool_input // empty')
# Only check tools that handle user-provided text content
case "$TOOL_NAME" in
Bash|Write|Edit|WebFetch)
;;
*)
exit 0
;;
esac
# Extract content to analyze based on tool type
CONTENT=""
case "$TOOL_NAME" in
Bash)
CONTENT=$(echo "$TOOL_INPUT" | jq -r '.command // empty')
;;
Write|Edit)
CONTENT=$(echo "$TOOL_INPUT" | jq -r '.content // .new_string // empty')
;;
WebFetch)
CONTENT=$(echo "$TOOL_INPUT" | jq -r '.url // empty')
;;
esac
# Skip if no content to analyze
[[ -z "$CONTENT" ]] && exit 0
# Convert to lowercase for case-insensitive matching
CONTENT_LOWER=$(echo "$CONTENT" | tr '[:upper:]' '[:lower:]')
# === ROLE OVERRIDE PATTERNS ===
# Attempts to override Claude's instructions or identity
ROLE_OVERRIDE_PATTERNS=(
"ignore previous instructions"
"ignore all previous"
"ignore your instructions"
"disregard previous"
"disregard your instructions"
"forget your instructions"
"forget everything"
"you are now"
"act as if"
"pretend you are"
"pretend to be"
"from now on you"
"new instructions:"
"override:"
"system prompt:"
)
for pattern in "${ROLE_OVERRIDE_PATTERNS[@]}"; do
if [[ "$CONTENT_LOWER" == *"$pattern"* ]]; then
echo "BLOCKED: Prompt injection detected - role override attempt: '$pattern'" >&2
exit 2
fi
done
# === JAILBREAK PATTERNS ===
# Known jailbreak techniques and bypass attempts
JAILBREAK_PATTERNS=(
"dan mode"
"developer mode"
"jailbreak"
"do anything now"
"bypass"
"no restrictions"
"without restrictions"
"unrestricted mode"
"god mode"
"sudo mode"
"admin mode"
"debug mode"
"maintenance mode"
)
for pattern in "${JAILBREAK_PATTERNS[@]}"; do
if [[ "$CONTENT_LOWER" == *"$pattern"* ]]; then
echo "BLOCKED: Prompt injection detected - jailbreak attempt: '$pattern'" >&2
exit 2
fi
done
# === DELIMITER INJECTION ===
# Attempts to inject fake system messages or close prompt sections
DELIMITER_PATTERNS=(
"</system>"
"<|endoftext|>"
"<|im_end|>"
"[/INST]"
"[INST]"
"<<SYS>>"
"<</SYS>>"
"### System:"
"### Human:"
"### Assistant:"
"```system"
"SYSTEM:"
)
for pattern in "${DELIMITER_PATTERNS[@]}"; do
if [[ "$CONTENT" == *"$pattern"* ]]; then
echo "BLOCKED: Prompt injection detected - delimiter injection: '$pattern'" >&2
exit 2
fi
done
# === AUTHORITY IMPERSONATION ===
# Claims of special authority or permissions
AUTHORITY_PATTERNS=(
"anthropic employee"
"anthropic staff"
"i am your creator"
"i am your developer"
"i have admin access"
"authorized to bypass"
"emergency override"
"security exception"
"this is a test"
"testing mode"
)
for pattern in "${AUTHORITY_PATTERNS[@]}"; do
if [[ "$CONTENT_LOWER" == *"$pattern"* ]]; then
echo "BLOCKED: Prompt injection detected - authority impersonation: '$pattern'" >&2
exit 2
fi
done
# === BASE64 ENCODED INSTRUCTIONS ===
# Detect potential base64-encoded payloads (heuristic)
# Look for long base64-like strings that might contain instructions
if echo "$CONTENT" | grep -qE '[A-Za-z0-9+/]{50,}={0,2}'; then
# Try to decode and check for injection patterns
DECODED=$(echo "$CONTENT" | grep -oE '[A-Za-z0-9+/]{50,}={0,2}' | head -1 | base64 -d 2>/dev/null || true)
DECODED_LOWER=$(echo "$DECODED" | tr '[:upper:]' '[:lower:]')
for pattern in "ignore" "override" "system" "jailbreak" "dan mode"; do
if [[ "$DECODED_LOWER" == *"$pattern"* ]]; then
echo "BLOCKED: Prompt injection detected - encoded payload containing: '$pattern'" >&2
exit 2
fi
done
fi
# === ANSI ESCAPE SEQUENCES ===
# Terminal manipulation via escape codes (CVE-related)
# \x1b[ CSI, \x1b] OSC, \x1b( charset selection
if echo "$CONTENT" | grep -qE $'\x1b\[|\x1b\]|\x1b\('; then
echo "BLOCKED: ANSI escape sequence detected - potential terminal injection" >&2
exit 2
fi
# === NULL BYTE INJECTION ===
# Null bytes can truncate strings and bypass security checks
if echo "$CONTENT" | grep -qP '\x00'; then
echo "BLOCKED: Null byte detected - potential truncation attack" >&2
exit 2
fi
# === NESTED COMMAND EXECUTION ===
# Detect $() and backtick command substitution that could bypass denylists
# This catches patterns like: $(curl evil.com | bash) or `rm -rf /`
NESTED_CMD_PATTERNS=(
'\$\([^)]*\b(curl|wget|bash|sh|nc|python|ruby|perl|php)\b'
'`[^`]*\b(curl|wget|bash|sh|nc|python|ruby|perl|php)\b'
'\$\([^)]*\b(rm|dd|mkfs|chmod|chown)\b'
'`[^`]*\b(rm|dd|mkfs|chmod|chown)\b'
)
for pattern in "${NESTED_CMD_PATTERNS[@]}"; do
if echo "$CONTENT" | grep -qE "$pattern"; then
echo "BLOCKED: Nested command execution detected - potential bypass attempt" >&2
exit 2
fi
done
# === CONTEXT MANIPULATION ===
# Attempts to manipulate the conversation context
CONTEXT_PATTERNS=(
"in the previous message"
"as i mentioned earlier"
"you agreed to"
"you already said"
"you promised"
"remember when you"
"our agreement was"
)
for pattern in "${CONTEXT_PATTERNS[@]}"; do
if [[ "$CONTENT_LOWER" == *"$pattern"* ]]; then
# Warning only - these could be legitimate
echo '{"systemMessage": "Warning: Detected potential context manipulation pattern. Verify legitimacy."}'
fi
done
# Allow by default
exit 0