feat(docs): add LLM Handbook + Google Whitepaper integration v3.3.0
Advanced Guardrails: - prompt-injection-detector.sh (PreToolUse) - output-validator.sh (PostToolUse heuristics) - claudemd-scanner.sh (SessionStart injection detection) - output-secrets-scanner.sh (PostToolUse secrets leak prevention) Observability & Monitoring: - session-logger.sh (JSONL activity logging) - session-stats.sh (cost tracking & analysis) - guide/observability.md (full documentation) LLM-as-a-Judge Evaluation: - output-evaluator.md agent (Haiku) - /validate-changes command - pre-commit-evaluator.sh (opt-in git hook) Google Agent Whitepaper Integration: - Context Triage Guide (Section 2.2.4) - CLAUDE.md Injection Warning (Section 3.1.3) - Agent Validation Checklist (Section 4.2.4) - MCP Security: Tool Shadowing & Confused Deputy (Section 8.6) - Session vs Memory patterns (Section 3.3.3) Stats: 10 new files, 8 modified, 5 new guide sections Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
19110eba22
commit
8a4d116e2e
17 changed files with 2188 additions and 3 deletions
184
examples/hooks/bash/output-validator.sh
Executable file
184
examples/hooks/bash/output-validator.sh
Executable file
|
|
@ -0,0 +1,184 @@
|
|||
#!/bin/bash
|
||||
# Hook: PostToolUse - Validate Claude's outputs for quality issues
|
||||
# Exit 0 = allow (always), but emit systemMessage warnings
|
||||
#
|
||||
# This hook performs heuristic validation of Claude's outputs to detect:
|
||||
# - Potential hallucinations (fabricated paths, functions)
|
||||
# - Sensitive data leakage in outputs
|
||||
# - High uncertainty indicators
|
||||
#
|
||||
# This is a lightweight heuristic check, not a full LLM evaluation.
|
||||
# For deeper validation, use the output-evaluator agent.
|
||||
#
|
||||
# Place in: .claude/hooks/output-validator.sh
|
||||
# Register in: .claude/settings.json under PostToolUse event
|
||||
|
||||
set -e
|
||||
|
||||
# Read JSON from stdin
|
||||
INPUT=$(cat)
|
||||
|
||||
TOOL_NAME=$(echo "$INPUT" | jq -r '.tool_name // empty')
|
||||
TOOL_OUTPUT=$(echo "$INPUT" | jq -r '.tool_output // empty')
|
||||
|
||||
# Only validate tools that produce code/content outputs
|
||||
case "$TOOL_NAME" in
|
||||
Edit|Write|Bash)
|
||||
;;
|
||||
*)
|
||||
exit 0
|
||||
;;
|
||||
esac
|
||||
|
||||
WARNINGS=()
|
||||
|
||||
# === FABRICATED FILE PATHS ===
|
||||
# Detect paths that look suspicious (common hallucination patterns)
|
||||
SUSPICIOUS_PATHS=(
|
||||
"/path/to/"
|
||||
"/your/project/"
|
||||
"/example/"
|
||||
"/foo/bar/"
|
||||
"/my/app/"
|
||||
"/user/project/"
|
||||
"C:\\Users\\User\\"
|
||||
"C:\\path\\to\\"
|
||||
)
|
||||
|
||||
for pattern in "${SUSPICIOUS_PATHS[@]}"; do
|
||||
if [[ "$TOOL_OUTPUT" == *"$pattern"* ]]; then
|
||||
WARNINGS+=("Suspicious placeholder path detected: '$pattern'")
|
||||
fi
|
||||
done
|
||||
|
||||
# === PLACEHOLDER CONTENT ===
|
||||
# Detect common placeholder patterns that shouldn't be in production code
|
||||
PLACEHOLDER_PATTERNS=(
|
||||
"TODO:"
|
||||
"FIXME:"
|
||||
"XXX:"
|
||||
"HACK:"
|
||||
"your-api-key"
|
||||
"your_api_key"
|
||||
"YOUR_API_KEY"
|
||||
"sk-..."
|
||||
"pk_test_"
|
||||
"pk_live_"
|
||||
"api_key_here"
|
||||
"replace_with"
|
||||
"insert_your"
|
||||
"placeholder"
|
||||
"example.com"
|
||||
"foo@bar.com"
|
||||
"test@test.com"
|
||||
)
|
||||
|
||||
for pattern in "${PLACEHOLDER_PATTERNS[@]}"; do
|
||||
if [[ "$TOOL_OUTPUT" == *"$pattern"* ]]; then
|
||||
WARNINGS+=("Placeholder content detected: '$pattern'")
|
||||
fi
|
||||
done
|
||||
|
||||
# === SENSITIVE DATA LEAKAGE ===
|
||||
# Detect potential secrets in output (could indicate data exposure)
|
||||
SECRET_PATTERNS=(
|
||||
# AWS
|
||||
'AKIA[0-9A-Z]{16}'
|
||||
# Generic API keys (long hex strings)
|
||||
'[a-f0-9]{32,}'
|
||||
# Private keys
|
||||
'-----BEGIN.*PRIVATE KEY-----'
|
||||
'-----BEGIN RSA'
|
||||
'-----BEGIN EC'
|
||||
# JWT tokens
|
||||
'eyJ[A-Za-z0-9_-]*\.eyJ[A-Za-z0-9_-]*\.'
|
||||
# Password patterns
|
||||
'password["\x27]?\s*[=:]\s*["\x27][^"\x27]{8,}'
|
||||
)
|
||||
|
||||
for pattern in "${SECRET_PATTERNS[@]}"; do
|
||||
if echo "$TOOL_OUTPUT" | grep -qE "$pattern" 2>/dev/null; then
|
||||
WARNINGS+=("Potential sensitive data in output (pattern: ${pattern:0:20}...)")
|
||||
fi
|
||||
done
|
||||
|
||||
# === UNCERTAINTY INDICATORS ===
|
||||
# Detect high uncertainty language that might indicate guessing
|
||||
UNCERTAINTY_PATTERNS=(
|
||||
"I'm not sure"
|
||||
"I think it might"
|
||||
"probably"
|
||||
"possibly"
|
||||
"might be"
|
||||
"could be"
|
||||
"I believe"
|
||||
"I assume"
|
||||
"I guess"
|
||||
"if I recall"
|
||||
"from memory"
|
||||
"I don't have access"
|
||||
"I cannot verify"
|
||||
)
|
||||
|
||||
UNCERTAINTY_COUNT=0
|
||||
TOOL_OUTPUT_LOWER=$(echo "$TOOL_OUTPUT" | tr '[:upper:]' '[:lower:]')
|
||||
|
||||
for pattern in "${UNCERTAINTY_PATTERNS[@]}"; do
|
||||
pattern_lower=$(echo "$pattern" | tr '[:upper:]' '[:lower:]')
|
||||
if [[ "$TOOL_OUTPUT_LOWER" == *"$pattern_lower"* ]]; then
|
||||
((UNCERTAINTY_COUNT++))
|
||||
fi
|
||||
done
|
||||
|
||||
if [[ $UNCERTAINTY_COUNT -ge 3 ]]; then
|
||||
WARNINGS+=("High uncertainty detected ($UNCERTAINTY_COUNT indicators) - verify output accuracy")
|
||||
fi
|
||||
|
||||
# === INCOMPLETE IMPLEMENTATIONS ===
|
||||
# Detect code that looks incomplete
|
||||
INCOMPLETE_PATTERNS=(
|
||||
"not implemented"
|
||||
"NotImplementedError"
|
||||
"throw new Error.*implement"
|
||||
"// TODO"
|
||||
"# TODO"
|
||||
"pass # "
|
||||
"raise NotImplemented"
|
||||
"undefined"
|
||||
)
|
||||
|
||||
for pattern in "${INCOMPLETE_PATTERNS[@]}"; do
|
||||
if echo "$TOOL_OUTPUT" | grep -qiE "$pattern" 2>/dev/null; then
|
||||
WARNINGS+=("Incomplete implementation detected: '$pattern'")
|
||||
fi
|
||||
done
|
||||
|
||||
# === HALLUCINATION INDICATORS ===
|
||||
# Detect patterns that often indicate hallucinated content
|
||||
HALLUCINATION_PATTERNS=(
|
||||
"According to the documentation"
|
||||
"As stated in"
|
||||
"The official guide says"
|
||||
"Based on the API reference"
|
||||
)
|
||||
|
||||
for pattern in "${HALLUCINATION_PATTERNS[@]}"; do
|
||||
if [[ "$TOOL_OUTPUT" == *"$pattern"* ]]; then
|
||||
WARNINGS+=("Unverified reference claim: '$pattern' - verify source")
|
||||
fi
|
||||
done
|
||||
|
||||
# === OUTPUT WARNINGS ===
|
||||
if [[ ${#WARNINGS[@]} -gt 0 ]]; then
|
||||
WARNING_MSG="Output validation warnings:\\n"
|
||||
for warn in "${WARNINGS[@]}"; do
|
||||
WARNING_MSG+=" - $warn\\n"
|
||||
done
|
||||
WARNING_MSG+="\\nReview output carefully before accepting."
|
||||
|
||||
# Emit as systemMessage (warning, not blocking)
|
||||
echo "{\"systemMessage\": \"$WARNING_MSG\"}"
|
||||
fi
|
||||
|
||||
# Always allow (this hook warns, doesn't block)
|
||||
exit 0
|
||||
Loading…
Add table
Add a link
Reference in a new issue