claude-code-ultimate-guide/examples/hooks/bash/pre-commit-evaluator.sh
Florian BRUNIAUX 8a4d116e2e feat(docs): add LLM Handbook + Google Whitepaper integration v3.3.0
Advanced Guardrails:
- prompt-injection-detector.sh (PreToolUse)
- output-validator.sh (PostToolUse heuristics)
- claudemd-scanner.sh (SessionStart injection detection)
- output-secrets-scanner.sh (PostToolUse secrets leak prevention)

Observability & Monitoring:
- session-logger.sh (JSONL activity logging)
- session-stats.sh (cost tracking & analysis)
- guide/observability.md (full documentation)

LLM-as-a-Judge Evaluation:
- output-evaluator.md agent (Haiku)
- /validate-changes command
- pre-commit-evaluator.sh (opt-in git hook)

Google Agent Whitepaper Integration:
- Context Triage Guide (Section 2.2.4)
- CLAUDE.md Injection Warning (Section 3.1.3)
- Agent Validation Checklist (Section 4.2.4)
- MCP Security: Tool Shadowing & Confused Deputy (Section 8.6)
- Session vs Memory patterns (Section 3.3.3)

Stats: 10 new files, 8 modified, 5 new guide sections

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-14 21:00:49 +01:00

207 lines
6.3 KiB
Bash
Executable file

#!/bin/bash
# Git pre-commit hook: LLM-as-a-Judge evaluation before commit
#
# This hook uses Claude to evaluate staged changes before allowing a commit.
# It's an OPT-IN feature due to API costs and latency.
#
# COST WARNING: Each commit evaluation costs ~$0.01-0.05 (Haiku model)
#
# Installation:
# 1. Copy to your repo: cp pre-commit-evaluator.sh .git/hooks/pre-commit
# 2. Make executable: chmod +x .git/hooks/pre-commit
# 3. Set required env var: export CLAUDE_PRECOMMIT_EVAL=1
#
# Environment Variables:
# CLAUDE_PRECOMMIT_EVAL - Set to "1" to enable (default: disabled)
# CLAUDE_EVAL_MODEL - Model to use (default: haiku)
# CLAUDE_EVAL_THRESHOLD - Minimum score to pass (default: 7)
# CLAUDE_EVAL_SKIP_PATHS - Colon-separated paths to skip (e.g., "docs:*.md")
#
# Bypass for single commit:
# CLAUDE_SKIP_EVAL=1 git commit -m "message"
# or
# git commit --no-verify -m "message"
set -e
# Check if evaluation is enabled
if [[ "${CLAUDE_PRECOMMIT_EVAL:-0}" != "1" ]]; then
exit 0
fi
# Check for bypass
if [[ "${CLAUDE_SKIP_EVAL:-0}" == "1" ]]; then
echo "Skipping LLM evaluation (CLAUDE_SKIP_EVAL=1)"
exit 0
fi
# Configuration
MODEL="${CLAUDE_EVAL_MODEL:-haiku}"
THRESHOLD="${CLAUDE_EVAL_THRESHOLD:-7}"
SKIP_PATHS="${CLAUDE_EVAL_SKIP_PATHS:-}"
# Colors
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
CYAN='\033[0;36m'
NC='\033[0m'
# Check for staged changes
STAGED_FILES=$(git diff --cached --name-only)
if [[ -z "$STAGED_FILES" ]]; then
exit 0
fi
# Filter out skipped paths
if [[ -n "$SKIP_PATHS" ]]; then
IFS=':' read -ra SKIP_ARRAY <<< "$SKIP_PATHS"
FILTERED_FILES=""
for file in $STAGED_FILES; do
skip=false
for pattern in "${SKIP_ARRAY[@]}"; do
if [[ "$file" == $pattern ]]; then
skip=true
break
fi
done
if [[ "$skip" == "false" ]]; then
FILTERED_FILES="$FILTERED_FILES $file"
fi
done
STAGED_FILES=$(echo "$FILTERED_FILES" | xargs)
fi
# Exit if all files were filtered
if [[ -z "$STAGED_FILES" ]]; then
exit 0
fi
# Count files
FILE_COUNT=$(echo "$STAGED_FILES" | wc -w | tr -d ' ')
echo -e "${CYAN}Evaluating $FILE_COUNT staged file(s) with Claude ($MODEL)...${NC}"
echo -e "${YELLOW}Cost: ~\$0.01-0.05 per evaluation${NC}"
echo ""
# Get the diff
DIFF=$(git diff --cached)
# Truncate diff if too large (to control costs)
MAX_CHARS=50000
if [[ ${#DIFF} -gt $MAX_CHARS ]]; then
echo -e "${YELLOW}Warning: Diff truncated to ${MAX_CHARS} chars for cost control${NC}"
DIFF="${DIFF:0:$MAX_CHARS}
[TRUNCATED - diff exceeded ${MAX_CHARS} characters]"
fi
# Prepare the prompt
PROMPT="You are a code quality evaluator. Analyze this git diff and provide a JSON evaluation.
Score each criterion from 0-10:
- correctness: Does the code work correctly?
- completeness: Is the implementation complete (no TODOs, stubs)?
- safety: No secrets, no security issues?
Respond ONLY with valid JSON in this format:
{
\"verdict\": \"APPROVE\" or \"NEEDS_REVIEW\" or \"REJECT\",
\"scores\": {\"correctness\": N, \"completeness\": N, \"safety\": N},
\"issues\": [{\"severity\": \"high/medium/low\", \"description\": \"...\"}],
\"summary\": \"One sentence summary\"
}
Rules:
- APPROVE if all scores >= $THRESHOLD and no high-severity issues
- NEEDS_REVIEW if any score is 5-$((THRESHOLD-1)) or medium issues exist
- REJECT if any score < 5 or high-severity security issues
Git diff to evaluate:
$DIFF"
# Call Claude (requires claude CLI to be installed and authenticated)
if ! command -v claude &> /dev/null; then
echo -e "${RED}Error: 'claude' CLI not found. Install Claude Code first.${NC}"
exit 1
fi
# Run evaluation
RESULT=$(echo "$PROMPT" | claude --model "$MODEL" --print 2>/dev/null) || {
echo -e "${RED}Error: Claude evaluation failed${NC}"
echo "You can bypass with: CLAUDE_SKIP_EVAL=1 git commit"
exit 1
}
# Extract JSON from response (handle potential markdown wrapping)
JSON_RESULT=$(echo "$RESULT" | grep -o '{.*}' | head -1)
if [[ -z "$JSON_RESULT" ]]; then
echo -e "${YELLOW}Warning: Could not parse evaluation result${NC}"
echo "Raw response: $RESULT"
echo ""
echo "Proceeding with commit (evaluation inconclusive)"
exit 0
fi
# Parse result
VERDICT=$(echo "$JSON_RESULT" | jq -r '.verdict // "UNKNOWN"')
CORRECTNESS=$(echo "$JSON_RESULT" | jq -r '.scores.correctness // 0')
COMPLETENESS=$(echo "$JSON_RESULT" | jq -r '.scores.completeness // 0')
SAFETY=$(echo "$JSON_RESULT" | jq -r '.scores.safety // 0')
SUMMARY=$(echo "$JSON_RESULT" | jq -r '.summary // "No summary"')
ISSUES=$(echo "$JSON_RESULT" | jq -r '.issues // []')
# Display results
echo ""
echo -e "${CYAN}═══════════════════════════════════════════════════════════${NC}"
echo -e "${CYAN} Evaluation Results${NC}"
echo -e "${CYAN}═══════════════════════════════════════════════════════════${NC}"
echo ""
echo " Correctness: $CORRECTNESS/10"
echo " Completeness: $COMPLETENESS/10"
echo " Safety: $SAFETY/10"
echo ""
echo " Summary: $SUMMARY"
echo ""
# Show issues if any
ISSUE_COUNT=$(echo "$ISSUES" | jq 'length')
if [[ "$ISSUE_COUNT" -gt 0 ]]; then
echo " Issues found:"
echo "$ISSUES" | jq -r '.[] | " [\(.severity | ascii_upcase)] \(.description)"'
echo ""
fi
# Handle verdict
case "$VERDICT" in
APPROVE)
echo -e "${GREEN}✓ APPROVED - Proceeding with commit${NC}"
echo ""
exit 0
;;
NEEDS_REVIEW)
echo -e "${YELLOW}⚠ NEEDS_REVIEW - Issues detected${NC}"
echo ""
echo "Options:"
echo " 1. Fix issues and try again"
echo " 2. Bypass: CLAUDE_SKIP_EVAL=1 git commit"
echo " 3. Skip hook: git commit --no-verify"
echo ""
exit 1
;;
REJECT)
echo -e "${RED}✗ REJECTED - Critical issues found${NC}"
echo ""
echo "Please fix the issues before committing."
echo "To force commit anyway: git commit --no-verify"
echo ""
exit 1
;;
*)
echo -e "${YELLOW}? Unknown verdict: $VERDICT${NC}"
echo "Proceeding with commit (evaluation inconclusive)"
exit 0
;;
esac