Advanced Guardrails: - prompt-injection-detector.sh (PreToolUse) - output-validator.sh (PostToolUse heuristics) - claudemd-scanner.sh (SessionStart injection detection) - output-secrets-scanner.sh (PostToolUse secrets leak prevention) Observability & Monitoring: - session-logger.sh (JSONL activity logging) - session-stats.sh (cost tracking & analysis) - guide/observability.md (full documentation) LLM-as-a-Judge Evaluation: - output-evaluator.md agent (Haiku) - /validate-changes command - pre-commit-evaluator.sh (opt-in git hook) Google Agent Whitepaper Integration: - Context Triage Guide (Section 2.2.4) - CLAUDE.md Injection Warning (Section 3.1.3) - Agent Validation Checklist (Section 4.2.4) - MCP Security: Tool Shadowing & Confused Deputy (Section 8.6) - Session vs Memory patterns (Section 3.3.3) Stats: 10 new files, 8 modified, 5 new guide sections Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
207 lines
6.3 KiB
Bash
Executable file
207 lines
6.3 KiB
Bash
Executable file
#!/bin/bash
|
|
# Git pre-commit hook: LLM-as-a-Judge evaluation before commit
|
|
#
|
|
# This hook uses Claude to evaluate staged changes before allowing a commit.
|
|
# It's an OPT-IN feature due to API costs and latency.
|
|
#
|
|
# COST WARNING: Each commit evaluation costs ~$0.01-0.05 (Haiku model)
|
|
#
|
|
# Installation:
|
|
# 1. Copy to your repo: cp pre-commit-evaluator.sh .git/hooks/pre-commit
|
|
# 2. Make executable: chmod +x .git/hooks/pre-commit
|
|
# 3. Set required env var: export CLAUDE_PRECOMMIT_EVAL=1
|
|
#
|
|
# Environment Variables:
|
|
# CLAUDE_PRECOMMIT_EVAL - Set to "1" to enable (default: disabled)
|
|
# CLAUDE_EVAL_MODEL - Model to use (default: haiku)
|
|
# CLAUDE_EVAL_THRESHOLD - Minimum score to pass (default: 7)
|
|
# CLAUDE_EVAL_SKIP_PATHS - Colon-separated paths to skip (e.g., "docs:*.md")
|
|
#
|
|
# Bypass for single commit:
|
|
# CLAUDE_SKIP_EVAL=1 git commit -m "message"
|
|
# or
|
|
# git commit --no-verify -m "message"
|
|
|
|
set -e
|
|
|
|
# Check if evaluation is enabled
|
|
if [[ "${CLAUDE_PRECOMMIT_EVAL:-0}" != "1" ]]; then
|
|
exit 0
|
|
fi
|
|
|
|
# Check for bypass
|
|
if [[ "${CLAUDE_SKIP_EVAL:-0}" == "1" ]]; then
|
|
echo "Skipping LLM evaluation (CLAUDE_SKIP_EVAL=1)"
|
|
exit 0
|
|
fi
|
|
|
|
# Configuration
|
|
MODEL="${CLAUDE_EVAL_MODEL:-haiku}"
|
|
THRESHOLD="${CLAUDE_EVAL_THRESHOLD:-7}"
|
|
SKIP_PATHS="${CLAUDE_EVAL_SKIP_PATHS:-}"
|
|
|
|
# Colors
|
|
RED='\033[0;31m'
|
|
GREEN='\033[0;32m'
|
|
YELLOW='\033[1;33m'
|
|
CYAN='\033[0;36m'
|
|
NC='\033[0m'
|
|
|
|
# Check for staged changes
|
|
STAGED_FILES=$(git diff --cached --name-only)
|
|
if [[ -z "$STAGED_FILES" ]]; then
|
|
exit 0
|
|
fi
|
|
|
|
# Filter out skipped paths
|
|
if [[ -n "$SKIP_PATHS" ]]; then
|
|
IFS=':' read -ra SKIP_ARRAY <<< "$SKIP_PATHS"
|
|
FILTERED_FILES=""
|
|
for file in $STAGED_FILES; do
|
|
skip=false
|
|
for pattern in "${SKIP_ARRAY[@]}"; do
|
|
if [[ "$file" == $pattern ]]; then
|
|
skip=true
|
|
break
|
|
fi
|
|
done
|
|
if [[ "$skip" == "false" ]]; then
|
|
FILTERED_FILES="$FILTERED_FILES $file"
|
|
fi
|
|
done
|
|
STAGED_FILES=$(echo "$FILTERED_FILES" | xargs)
|
|
fi
|
|
|
|
# Exit if all files were filtered
|
|
if [[ -z "$STAGED_FILES" ]]; then
|
|
exit 0
|
|
fi
|
|
|
|
# Count files
|
|
FILE_COUNT=$(echo "$STAGED_FILES" | wc -w | tr -d ' ')
|
|
|
|
echo -e "${CYAN}Evaluating $FILE_COUNT staged file(s) with Claude ($MODEL)...${NC}"
|
|
echo -e "${YELLOW}Cost: ~\$0.01-0.05 per evaluation${NC}"
|
|
echo ""
|
|
|
|
# Get the diff
|
|
DIFF=$(git diff --cached)
|
|
|
|
# Truncate diff if too large (to control costs)
|
|
MAX_CHARS=50000
|
|
if [[ ${#DIFF} -gt $MAX_CHARS ]]; then
|
|
echo -e "${YELLOW}Warning: Diff truncated to ${MAX_CHARS} chars for cost control${NC}"
|
|
DIFF="${DIFF:0:$MAX_CHARS}
|
|
|
|
[TRUNCATED - diff exceeded ${MAX_CHARS} characters]"
|
|
fi
|
|
|
|
# Prepare the prompt
|
|
PROMPT="You are a code quality evaluator. Analyze this git diff and provide a JSON evaluation.
|
|
|
|
Score each criterion from 0-10:
|
|
- correctness: Does the code work correctly?
|
|
- completeness: Is the implementation complete (no TODOs, stubs)?
|
|
- safety: No secrets, no security issues?
|
|
|
|
Respond ONLY with valid JSON in this format:
|
|
{
|
|
\"verdict\": \"APPROVE\" or \"NEEDS_REVIEW\" or \"REJECT\",
|
|
\"scores\": {\"correctness\": N, \"completeness\": N, \"safety\": N},
|
|
\"issues\": [{\"severity\": \"high/medium/low\", \"description\": \"...\"}],
|
|
\"summary\": \"One sentence summary\"
|
|
}
|
|
|
|
Rules:
|
|
- APPROVE if all scores >= $THRESHOLD and no high-severity issues
|
|
- NEEDS_REVIEW if any score is 5-$((THRESHOLD-1)) or medium issues exist
|
|
- REJECT if any score < 5 or high-severity security issues
|
|
|
|
Git diff to evaluate:
|
|
|
|
$DIFF"
|
|
|
|
# Call Claude (requires claude CLI to be installed and authenticated)
|
|
if ! command -v claude &> /dev/null; then
|
|
echo -e "${RED}Error: 'claude' CLI not found. Install Claude Code first.${NC}"
|
|
exit 1
|
|
fi
|
|
|
|
# Run evaluation
|
|
RESULT=$(echo "$PROMPT" | claude --model "$MODEL" --print 2>/dev/null) || {
|
|
echo -e "${RED}Error: Claude evaluation failed${NC}"
|
|
echo "You can bypass with: CLAUDE_SKIP_EVAL=1 git commit"
|
|
exit 1
|
|
}
|
|
|
|
# Extract JSON from response (handle potential markdown wrapping)
|
|
JSON_RESULT=$(echo "$RESULT" | grep -o '{.*}' | head -1)
|
|
|
|
if [[ -z "$JSON_RESULT" ]]; then
|
|
echo -e "${YELLOW}Warning: Could not parse evaluation result${NC}"
|
|
echo "Raw response: $RESULT"
|
|
echo ""
|
|
echo "Proceeding with commit (evaluation inconclusive)"
|
|
exit 0
|
|
fi
|
|
|
|
# Parse result
|
|
VERDICT=$(echo "$JSON_RESULT" | jq -r '.verdict // "UNKNOWN"')
|
|
CORRECTNESS=$(echo "$JSON_RESULT" | jq -r '.scores.correctness // 0')
|
|
COMPLETENESS=$(echo "$JSON_RESULT" | jq -r '.scores.completeness // 0')
|
|
SAFETY=$(echo "$JSON_RESULT" | jq -r '.scores.safety // 0')
|
|
SUMMARY=$(echo "$JSON_RESULT" | jq -r '.summary // "No summary"')
|
|
ISSUES=$(echo "$JSON_RESULT" | jq -r '.issues // []')
|
|
|
|
# Display results
|
|
echo ""
|
|
echo -e "${CYAN}═══════════════════════════════════════════════════════════${NC}"
|
|
echo -e "${CYAN} Evaluation Results${NC}"
|
|
echo -e "${CYAN}═══════════════════════════════════════════════════════════${NC}"
|
|
echo ""
|
|
echo " Correctness: $CORRECTNESS/10"
|
|
echo " Completeness: $COMPLETENESS/10"
|
|
echo " Safety: $SAFETY/10"
|
|
echo ""
|
|
echo " Summary: $SUMMARY"
|
|
echo ""
|
|
|
|
# Show issues if any
|
|
ISSUE_COUNT=$(echo "$ISSUES" | jq 'length')
|
|
if [[ "$ISSUE_COUNT" -gt 0 ]]; then
|
|
echo " Issues found:"
|
|
echo "$ISSUES" | jq -r '.[] | " [\(.severity | ascii_upcase)] \(.description)"'
|
|
echo ""
|
|
fi
|
|
|
|
# Handle verdict
|
|
case "$VERDICT" in
|
|
APPROVE)
|
|
echo -e "${GREEN}✓ APPROVED - Proceeding with commit${NC}"
|
|
echo ""
|
|
exit 0
|
|
;;
|
|
NEEDS_REVIEW)
|
|
echo -e "${YELLOW}⚠ NEEDS_REVIEW - Issues detected${NC}"
|
|
echo ""
|
|
echo "Options:"
|
|
echo " 1. Fix issues and try again"
|
|
echo " 2. Bypass: CLAUDE_SKIP_EVAL=1 git commit"
|
|
echo " 3. Skip hook: git commit --no-verify"
|
|
echo ""
|
|
exit 1
|
|
;;
|
|
REJECT)
|
|
echo -e "${RED}✗ REJECTED - Critical issues found${NC}"
|
|
echo ""
|
|
echo "Please fix the issues before committing."
|
|
echo "To force commit anyway: git commit --no-verify"
|
|
echo ""
|
|
exit 1
|
|
;;
|
|
*)
|
|
echo -e "${YELLOW}? Unknown verdict: $VERDICT${NC}"
|
|
echo "Proceeding with commit (evaluation inconclusive)"
|
|
exit 0
|
|
;;
|
|
esac
|