feat(security): add security hardening guide and hooks v3.6.0

- Add guide/security-hardening.md (~10K) covering: - MCP vetting workflow with CVE-2025-53109/53110, 54135, 54136 - Prompt injection evasion techniques (Unicode, ANSI, null bytes) - Secret detection tool comparison (Gitleaks, TruffleHog, GitGuardian) - Incident response procedures - Add 3 new security hooks: - unicode-injection-scanner.sh: zero-width, RTL, ANSI escape detection - repo-integrity-scanner.sh: scan README/package.json for injection - mcp-config-integrity.sh: verify MCP config hash - Update existing hooks: - prompt-injection-detector.sh: +ANSI, +null bytes, +nested cmd - output-secrets-scanner.sh: +env leakage, +generic tokens - Update cross-references in ultimate-guide.md (§7.4, §8.6) - Move MCP Security Hardening to Done in IDEAS.md Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-15 07:39:53 +01:00 · 2026-01-15 07:39:53 +01:00 · 34b2ca7200
commit 34b2ca7200
parent 55a9fa34cf
12 changed files with 986 additions and 22 deletions
--- a/examples/hooks/bash/unicode-injection-scanner.sh
+++ b/examples/hooks/bash/unicode-injection-scanner.sh
@ -0,0 +1,141 @@
+#!/bin/bash
+# =============================================================================
+# Unicode Injection Scanner Hook
+# =============================================================================
+# Event: PreToolUse (runs before Edit/Write operations)
+# Purpose: Detect invisible Unicode characters used for prompt injection
+#
+# This hook detects evasion techniques that embed invisible instructions:
+#   - Zero-width characters (U+200B-U+200D, U+FEFF)
+#   - RTL/LTR override (U+202A-U+202E, U+2066-U+2069)
+#   - ANSI escape sequences (terminal injection)
+#   - Null bytes (truncation attacks)
+#   - Tag characters (U+E0000-U+E007F)
+#
+# Installation:
+#   Add to .claude/settings.json:
+#   {
+#     "hooks": {
+#       "PreToolUse": [{
+#         "matcher": "Edit|Write",
+#         "hooks": ["bash examples/hooks/bash/unicode-injection-scanner.sh"]
+#       }]
+#     }
+#   }
+#
+# Exit codes:
+#   0 = allow (no injection detected)
+#   2 = block (injection detected, stderr message shown to Claude)
+#
+# References:
+#   - CVE-2025-53109/53110: Unicode-based sandbox escape
+#   - Arxiv 2509.22040: Prompt Injection on Coding Assistants
+# =============================================================================
+
+set -euo pipefail
+
+# Read the hook input from stdin
+INPUT=$(cat)
+
+TOOL_NAME=$(echo "$INPUT" | jq -r '.tool_name // empty')
+TOOL_INPUT=$(echo "$INPUT" | jq -r '.tool_input // empty')
+
+# Only check Edit and Write tools
+case "$TOOL_NAME" in
+    Edit|Write)
+        ;;
+    *)
+        exit 0
+        ;;
+esac
+
+# Extract content to analyze
+CONTENT=""
+case "$TOOL_NAME" in
+    Write)
+        CONTENT=$(echo "$TOOL_INPUT" | jq -r '.content // empty')
+        ;;
+    Edit)
+        CONTENT=$(echo "$TOOL_INPUT" | jq -r '.new_string // empty')
+        ;;
+esac
+
+# Skip if no content
+[[ -z "$CONTENT" ]] && exit 0
+
+# === ZERO-WIDTH CHARACTERS ===
+# U+200B Zero Width Space
+# U+200C Zero Width Non-Joiner
+# U+200D Zero Width Joiner
+# U+FEFF Byte Order Mark (when not at start)
+if echo "$CONTENT" | grep -qP '[\x{200B}-\x{200D}\x{FEFF}]'; then
+    echo "BLOCKED: Zero-width characters detected (U+200B-U+200D or BOM). These can hide malicious instructions." >&2
+    exit 2
+fi
+
+# === BIDIRECTIONAL TEXT OVERRIDE ===
+# U+202A Left-to-Right Embedding
+# U+202B Right-to-Left Embedding
+# U+202C Pop Directional Formatting
+# U+202D Left-to-Right Override
+# U+202E Right-to-Left Override (most dangerous - reverses text display)
+# U+2066-U+2069 Isolate controls
+if echo "$CONTENT" | grep -qP '[\x{202A}-\x{202E}\x{2066}-\x{2069}]'; then
+    echo "BLOCKED: Bidirectional text override detected (U+202A-U+202E). These can disguise malicious commands." >&2
+    exit 2
+fi
+
+# === ANSI ESCAPE SEQUENCES ===
+# \x1b[ CSI (Control Sequence Introducer) - terminal control
+# \x1b] OSC (Operating System Command)
+# \x1b( Character set selection
+# These can manipulate terminal display or execute commands
+if echo "$CONTENT" | grep -qE $'\x1b\[|\x1b\]|\x1b\('; then
+    echo "BLOCKED: ANSI escape sequence detected. These can manipulate terminal display." >&2
+    exit 2
+fi
+
+# === NULL BYTES ===
+# \x00 can truncate strings and bypass security checks
+if echo "$CONTENT" | grep -qP '\x00'; then
+    echo "BLOCKED: Null byte detected. These can cause string truncation attacks." >&2
+    exit 2
+fi
+
+# === TAG CHARACTERS ===
+# U+E0000-U+E007F are invisible "tag" characters
+# Sometimes used to embed hidden data
+if echo "$CONTENT" | grep -qP '[\x{E0000}-\x{E007F}]'; then
+    echo "BLOCKED: Unicode tag characters detected (U+E0000-E007F). These can embed invisible data." >&2
+    exit 2
+fi
+
+# === OVERLONG UTF-8 SEQUENCES ===
+# Detect potential overlong encodings (e.g., encoding '/' as C0 AF instead of 2F)
+# These can bypass path filters
+# Check for C0 or C1 bytes followed by 80-BF (overlong 2-byte sequences)
+if echo "$CONTENT" | grep -qP '[\xC0-\xC1][\x80-\xBF]'; then
+    echo "BLOCKED: Overlong UTF-8 sequence detected. These can bypass security filters." >&2
+    exit 2
+fi
+
+# === HOMOGLYPHS WARNING ===
+# Detect Cyrillic characters that look like Latin (confusables)
+# Common in typosquatting and filter bypass
+# а (U+0430) vs a, е (U+0435) vs e, о (U+043E) vs o, etc.
+HOMOGLYPHS_FOUND=false
+if echo "$CONTENT" | grep -qP '[\x{0430}\x{0435}\x{043E}\x{0440}\x{0441}\x{0445}]'; then
+    HOMOGLYPHS_FOUND=true
+fi
+if echo "$CONTENT" | grep -qP '[\x{0391}-\x{03C9}]' && echo "$CONTENT" | grep -qP '[a-zA-Z]'; then
+    # Greek mixed with Latin
+    HOMOGLYPHS_FOUND=true
+fi
+
+if [[ "$HOMOGLYPHS_FOUND" == "true" ]]; then
+    # Warning only - could be legitimate multilingual content
+    echo '{"systemMessage": "Warning: Potential homoglyph characters detected (Cyrillic/Greek mixed with Latin). Verify this is not an attempt to bypass filters."}'
+fi
+
+# All checks passed
+exit 0