feat(docs): add LLM Handbook + Google Whitepaper integration v3.3.0

Advanced Guardrails: - prompt-injection-detector.sh (PreToolUse) - output-validator.sh (PostToolUse heuristics) - claudemd-scanner.sh (SessionStart injection detection) - output-secrets-scanner.sh (PostToolUse secrets leak prevention) Observability & Monitoring: - session-logger.sh (JSONL activity logging) - session-stats.sh (cost tracking & analysis) - guide/observability.md (full documentation) LLM-as-a-Judge Evaluation: - output-evaluator.md agent (Haiku) - /validate-changes command - pre-commit-evaluator.sh (opt-in git hook) Google Agent Whitepaper Integration: - Context Triage Guide (Section 2.2.4) - CLAUDE.md Injection Warning (Section 3.1.3) - Agent Validation Checklist (Section 4.2.4) - MCP Security: Tool Shadowing & Confused Deputy (Section 8.6) - Session vs Memory patterns (Section 3.3.3) Stats: 10 new files, 8 modified, 5 new guide sections Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-14 21:00:49 +01:00 · 2026-01-14 21:00:49 +01:00 · 8a4d116e2e
commit 8a4d116e2e
parent 19110eba22
17 changed files with 2188 additions and 3 deletions
--- a/examples/hooks/bash/output-validator.sh
+++ b/examples/hooks/bash/output-validator.sh
@ -0,0 +1,184 @@
+#!/bin/bash
+# Hook: PostToolUse - Validate Claude's outputs for quality issues
+# Exit 0 = allow (always), but emit systemMessage warnings
+#
+# This hook performs heuristic validation of Claude's outputs to detect:
+# - Potential hallucinations (fabricated paths, functions)
+# - Sensitive data leakage in outputs
+# - High uncertainty indicators
+#
+# This is a lightweight heuristic check, not a full LLM evaluation.
+# For deeper validation, use the output-evaluator agent.
+#
+# Place in: .claude/hooks/output-validator.sh
+# Register in: .claude/settings.json under PostToolUse event
+
+set -e
+
+# Read JSON from stdin
+INPUT=$(cat)
+
+TOOL_NAME=$(echo "$INPUT" | jq -r '.tool_name // empty')
+TOOL_OUTPUT=$(echo "$INPUT" | jq -r '.tool_output // empty')
+
+# Only validate tools that produce code/content outputs
+case "$TOOL_NAME" in
+    Edit|Write|Bash)
+        ;;
+    *)
+        exit 0
+        ;;
+esac
+
+WARNINGS=()
+
+# === FABRICATED FILE PATHS ===
+# Detect paths that look suspicious (common hallucination patterns)
+SUSPICIOUS_PATHS=(
+    "/path/to/"
+    "/your/project/"
+    "/example/"
+    "/foo/bar/"
+    "/my/app/"
+    "/user/project/"
+    "C:\\Users\\User\\"
+    "C:\\path\\to\\"
+)
+
+for pattern in "${SUSPICIOUS_PATHS[@]}"; do
+    if [[ "$TOOL_OUTPUT" == *"$pattern"* ]]; then
+        WARNINGS+=("Suspicious placeholder path detected: '$pattern'")
+    fi
+done
+
+# === PLACEHOLDER CONTENT ===
+# Detect common placeholder patterns that shouldn't be in production code
+PLACEHOLDER_PATTERNS=(
+    "TODO:"
+    "FIXME:"
+    "XXX:"
+    "HACK:"
+    "your-api-key"
+    "your_api_key"
+    "YOUR_API_KEY"
+    "sk-..."
+    "pk_test_"
+    "pk_live_"
+    "api_key_here"
+    "replace_with"
+    "insert_your"
+    "placeholder"
+    "example.com"
+    "foo@bar.com"
+    "test@test.com"
+)
+
+for pattern in "${PLACEHOLDER_PATTERNS[@]}"; do
+    if [[ "$TOOL_OUTPUT" == *"$pattern"* ]]; then
+        WARNINGS+=("Placeholder content detected: '$pattern'")
+    fi
+done
+
+# === SENSITIVE DATA LEAKAGE ===
+# Detect potential secrets in output (could indicate data exposure)
+SECRET_PATTERNS=(
+    # AWS
+    'AKIA[0-9A-Z]{16}'
+    # Generic API keys (long hex strings)
+    '[a-f0-9]{32,}'
+    # Private keys
+    '-----BEGIN.*PRIVATE KEY-----'
+    '-----BEGIN RSA'
+    '-----BEGIN EC'
+    # JWT tokens
+    'eyJ[A-Za-z0-9_-]*\.eyJ[A-Za-z0-9_-]*\.'
+    # Password patterns
+    'password["\x27]?\s*[=:]\s*["\x27][^"\x27]{8,}'
+)
+
+for pattern in "${SECRET_PATTERNS[@]}"; do
+    if echo "$TOOL_OUTPUT" | grep -qE "$pattern" 2>/dev/null; then
+        WARNINGS+=("Potential sensitive data in output (pattern: ${pattern:0:20}...)")
+    fi
+done
+
+# === UNCERTAINTY INDICATORS ===
+# Detect high uncertainty language that might indicate guessing
+UNCERTAINTY_PATTERNS=(
+    "I'm not sure"
+    "I think it might"
+    "probably"
+    "possibly"
+    "might be"
+    "could be"
+    "I believe"
+    "I assume"
+    "I guess"
+    "if I recall"
+    "from memory"
+    "I don't have access"
+    "I cannot verify"
+)
+
+UNCERTAINTY_COUNT=0
+TOOL_OUTPUT_LOWER=$(echo "$TOOL_OUTPUT" | tr '[:upper:]' '[:lower:]')
+
+for pattern in "${UNCERTAINTY_PATTERNS[@]}"; do
+    pattern_lower=$(echo "$pattern" | tr '[:upper:]' '[:lower:]')
+    if [[ "$TOOL_OUTPUT_LOWER" == *"$pattern_lower"* ]]; then
+        ((UNCERTAINTY_COUNT++))
+    fi
+done
+
+if [[ $UNCERTAINTY_COUNT -ge 3 ]]; then
+    WARNINGS+=("High uncertainty detected ($UNCERTAINTY_COUNT indicators) - verify output accuracy")
+fi
+
+# === INCOMPLETE IMPLEMENTATIONS ===
+# Detect code that looks incomplete
+INCOMPLETE_PATTERNS=(
+    "not implemented"
+    "NotImplementedError"
+    "throw new Error.*implement"
+    "// TODO"
+    "# TODO"
+    "pass  # "
+    "raise NotImplemented"
+    "undefined"
+)
+
+for pattern in "${INCOMPLETE_PATTERNS[@]}"; do
+    if echo "$TOOL_OUTPUT" | grep -qiE "$pattern" 2>/dev/null; then
+        WARNINGS+=("Incomplete implementation detected: '$pattern'")
+    fi
+done
+
+# === HALLUCINATION INDICATORS ===
+# Detect patterns that often indicate hallucinated content
+HALLUCINATION_PATTERNS=(
+    "According to the documentation"
+    "As stated in"
+    "The official guide says"
+    "Based on the API reference"
+)
+
+for pattern in "${HALLUCINATION_PATTERNS[@]}"; do
+    if [[ "$TOOL_OUTPUT" == *"$pattern"* ]]; then
+        WARNINGS+=("Unverified reference claim: '$pattern' - verify source")
+    fi
+done
+
+# === OUTPUT WARNINGS ===
+if [[ ${#WARNINGS[@]} -gt 0 ]]; then
+    WARNING_MSG="Output validation warnings:\\n"
+    for warn in "${WARNINGS[@]}"; do
+        WARNING_MSG+="  - $warn\\n"
+    done
+    WARNING_MSG+="\\nReview output carefully before accepting."
+
+    # Emit as systemMessage (warning, not blocking)
+    echo "{\"systemMessage\": \"$WARNING_MSG\"}"
+fi
+
+# Always allow (this hook warns, doesn't block)
+exit 0