feat(docs): add LLM Handbook + Google Whitepaper integration v3.3.0
Advanced Guardrails: - prompt-injection-detector.sh (PreToolUse) - output-validator.sh (PostToolUse heuristics) - claudemd-scanner.sh (SessionStart injection detection) - output-secrets-scanner.sh (PostToolUse secrets leak prevention) Observability & Monitoring: - session-logger.sh (JSONL activity logging) - session-stats.sh (cost tracking & analysis) - guide/observability.md (full documentation) LLM-as-a-Judge Evaluation: - output-evaluator.md agent (Haiku) - /validate-changes command - pre-commit-evaluator.sh (opt-in git hook) Google Agent Whitepaper Integration: - Context Triage Guide (Section 2.2.4) - CLAUDE.md Injection Warning (Section 3.1.3) - Agent Validation Checklist (Section 4.2.4) - MCP Security: Tool Shadowing & Confused Deputy (Section 8.6) - Session vs Memory patterns (Section 3.3.3) Stats: 10 new files, 8 modified, 5 new guide sections Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
19110eba22
commit
8a4d116e2e
17 changed files with 2188 additions and 3 deletions
207
examples/hooks/bash/pre-commit-evaluator.sh
Executable file
207
examples/hooks/bash/pre-commit-evaluator.sh
Executable file
|
|
@ -0,0 +1,207 @@
|
|||
#!/bin/bash
|
||||
# Git pre-commit hook: LLM-as-a-Judge evaluation before commit
|
||||
#
|
||||
# This hook uses Claude to evaluate staged changes before allowing a commit.
|
||||
# It's an OPT-IN feature due to API costs and latency.
|
||||
#
|
||||
# COST WARNING: Each commit evaluation costs ~$0.01-0.05 (Haiku model)
|
||||
#
|
||||
# Installation:
|
||||
# 1. Copy to your repo: cp pre-commit-evaluator.sh .git/hooks/pre-commit
|
||||
# 2. Make executable: chmod +x .git/hooks/pre-commit
|
||||
# 3. Set required env var: export CLAUDE_PRECOMMIT_EVAL=1
|
||||
#
|
||||
# Environment Variables:
|
||||
# CLAUDE_PRECOMMIT_EVAL - Set to "1" to enable (default: disabled)
|
||||
# CLAUDE_EVAL_MODEL - Model to use (default: haiku)
|
||||
# CLAUDE_EVAL_THRESHOLD - Minimum score to pass (default: 7)
|
||||
# CLAUDE_EVAL_SKIP_PATHS - Colon-separated paths to skip (e.g., "docs:*.md")
|
||||
#
|
||||
# Bypass for single commit:
|
||||
# CLAUDE_SKIP_EVAL=1 git commit -m "message"
|
||||
# or
|
||||
# git commit --no-verify -m "message"
|
||||
|
||||
set -e
|
||||
|
||||
# Check if evaluation is enabled
|
||||
if [[ "${CLAUDE_PRECOMMIT_EVAL:-0}" != "1" ]]; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Check for bypass
|
||||
if [[ "${CLAUDE_SKIP_EVAL:-0}" == "1" ]]; then
|
||||
echo "Skipping LLM evaluation (CLAUDE_SKIP_EVAL=1)"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Configuration
|
||||
MODEL="${CLAUDE_EVAL_MODEL:-haiku}"
|
||||
THRESHOLD="${CLAUDE_EVAL_THRESHOLD:-7}"
|
||||
SKIP_PATHS="${CLAUDE_EVAL_SKIP_PATHS:-}"
|
||||
|
||||
# Colors
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
CYAN='\033[0;36m'
|
||||
NC='\033[0m'
|
||||
|
||||
# Check for staged changes
|
||||
STAGED_FILES=$(git diff --cached --name-only)
|
||||
if [[ -z "$STAGED_FILES" ]]; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Filter out skipped paths
|
||||
if [[ -n "$SKIP_PATHS" ]]; then
|
||||
IFS=':' read -ra SKIP_ARRAY <<< "$SKIP_PATHS"
|
||||
FILTERED_FILES=""
|
||||
for file in $STAGED_FILES; do
|
||||
skip=false
|
||||
for pattern in "${SKIP_ARRAY[@]}"; do
|
||||
if [[ "$file" == $pattern ]]; then
|
||||
skip=true
|
||||
break
|
||||
fi
|
||||
done
|
||||
if [[ "$skip" == "false" ]]; then
|
||||
FILTERED_FILES="$FILTERED_FILES $file"
|
||||
fi
|
||||
done
|
||||
STAGED_FILES=$(echo "$FILTERED_FILES" | xargs)
|
||||
fi
|
||||
|
||||
# Exit if all files were filtered
|
||||
if [[ -z "$STAGED_FILES" ]]; then
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Count files
|
||||
FILE_COUNT=$(echo "$STAGED_FILES" | wc -w | tr -d ' ')
|
||||
|
||||
echo -e "${CYAN}Evaluating $FILE_COUNT staged file(s) with Claude ($MODEL)...${NC}"
|
||||
echo -e "${YELLOW}Cost: ~\$0.01-0.05 per evaluation${NC}"
|
||||
echo ""
|
||||
|
||||
# Get the diff
|
||||
DIFF=$(git diff --cached)
|
||||
|
||||
# Truncate diff if too large (to control costs)
|
||||
MAX_CHARS=50000
|
||||
if [[ ${#DIFF} -gt $MAX_CHARS ]]; then
|
||||
echo -e "${YELLOW}Warning: Diff truncated to ${MAX_CHARS} chars for cost control${NC}"
|
||||
DIFF="${DIFF:0:$MAX_CHARS}
|
||||
|
||||
[TRUNCATED - diff exceeded ${MAX_CHARS} characters]"
|
||||
fi
|
||||
|
||||
# Prepare the prompt
|
||||
PROMPT="You are a code quality evaluator. Analyze this git diff and provide a JSON evaluation.
|
||||
|
||||
Score each criterion from 0-10:
|
||||
- correctness: Does the code work correctly?
|
||||
- completeness: Is the implementation complete (no TODOs, stubs)?
|
||||
- safety: No secrets, no security issues?
|
||||
|
||||
Respond ONLY with valid JSON in this format:
|
||||
{
|
||||
\"verdict\": \"APPROVE\" or \"NEEDS_REVIEW\" or \"REJECT\",
|
||||
\"scores\": {\"correctness\": N, \"completeness\": N, \"safety\": N},
|
||||
\"issues\": [{\"severity\": \"high/medium/low\", \"description\": \"...\"}],
|
||||
\"summary\": \"One sentence summary\"
|
||||
}
|
||||
|
||||
Rules:
|
||||
- APPROVE if all scores >= $THRESHOLD and no high-severity issues
|
||||
- NEEDS_REVIEW if any score is 5-$((THRESHOLD-1)) or medium issues exist
|
||||
- REJECT if any score < 5 or high-severity security issues
|
||||
|
||||
Git diff to evaluate:
|
||||
|
||||
$DIFF"
|
||||
|
||||
# Call Claude (requires claude CLI to be installed and authenticated)
|
||||
if ! command -v claude &> /dev/null; then
|
||||
echo -e "${RED}Error: 'claude' CLI not found. Install Claude Code first.${NC}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Run evaluation
|
||||
RESULT=$(echo "$PROMPT" | claude --model "$MODEL" --print 2>/dev/null) || {
|
||||
echo -e "${RED}Error: Claude evaluation failed${NC}"
|
||||
echo "You can bypass with: CLAUDE_SKIP_EVAL=1 git commit"
|
||||
exit 1
|
||||
}
|
||||
|
||||
# Extract JSON from response (handle potential markdown wrapping)
|
||||
JSON_RESULT=$(echo "$RESULT" | grep -o '{.*}' | head -1)
|
||||
|
||||
if [[ -z "$JSON_RESULT" ]]; then
|
||||
echo -e "${YELLOW}Warning: Could not parse evaluation result${NC}"
|
||||
echo "Raw response: $RESULT"
|
||||
echo ""
|
||||
echo "Proceeding with commit (evaluation inconclusive)"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
# Parse result
|
||||
VERDICT=$(echo "$JSON_RESULT" | jq -r '.verdict // "UNKNOWN"')
|
||||
CORRECTNESS=$(echo "$JSON_RESULT" | jq -r '.scores.correctness // 0')
|
||||
COMPLETENESS=$(echo "$JSON_RESULT" | jq -r '.scores.completeness // 0')
|
||||
SAFETY=$(echo "$JSON_RESULT" | jq -r '.scores.safety // 0')
|
||||
SUMMARY=$(echo "$JSON_RESULT" | jq -r '.summary // "No summary"')
|
||||
ISSUES=$(echo "$JSON_RESULT" | jq -r '.issues // []')
|
||||
|
||||
# Display results
|
||||
echo ""
|
||||
echo -e "${CYAN}═══════════════════════════════════════════════════════════${NC}"
|
||||
echo -e "${CYAN} Evaluation Results${NC}"
|
||||
echo -e "${CYAN}═══════════════════════════════════════════════════════════${NC}"
|
||||
echo ""
|
||||
echo " Correctness: $CORRECTNESS/10"
|
||||
echo " Completeness: $COMPLETENESS/10"
|
||||
echo " Safety: $SAFETY/10"
|
||||
echo ""
|
||||
echo " Summary: $SUMMARY"
|
||||
echo ""
|
||||
|
||||
# Show issues if any
|
||||
ISSUE_COUNT=$(echo "$ISSUES" | jq 'length')
|
||||
if [[ "$ISSUE_COUNT" -gt 0 ]]; then
|
||||
echo " Issues found:"
|
||||
echo "$ISSUES" | jq -r '.[] | " [\(.severity | ascii_upcase)] \(.description)"'
|
||||
echo ""
|
||||
fi
|
||||
|
||||
# Handle verdict
|
||||
case "$VERDICT" in
|
||||
APPROVE)
|
||||
echo -e "${GREEN}✓ APPROVED - Proceeding with commit${NC}"
|
||||
echo ""
|
||||
exit 0
|
||||
;;
|
||||
NEEDS_REVIEW)
|
||||
echo -e "${YELLOW}⚠ NEEDS_REVIEW - Issues detected${NC}"
|
||||
echo ""
|
||||
echo "Options:"
|
||||
echo " 1. Fix issues and try again"
|
||||
echo " 2. Bypass: CLAUDE_SKIP_EVAL=1 git commit"
|
||||
echo " 3. Skip hook: git commit --no-verify"
|
||||
echo ""
|
||||
exit 1
|
||||
;;
|
||||
REJECT)
|
||||
echo -e "${RED}✗ REJECTED - Critical issues found${NC}"
|
||||
echo ""
|
||||
echo "Please fix the issues before committing."
|
||||
echo "To force commit anyway: git commit --no-verify"
|
||||
echo ""
|
||||
exit 1
|
||||
;;
|
||||
*)
|
||||
echo -e "${YELLOW}? Unknown verdict: $VERDICT${NC}"
|
||||
echo "Proceeding with commit (evaluation inconclusive)"
|
||||
exit 0
|
||||
;;
|
||||
esac
|
||||
Loading…
Add table
Add a link
Reference in a new issue