claude-code-ultimate-guide/scripts/generate-audit-report.py
Florian BRUNIAUX a55ff38143 feat(quiz): add complete audit system for 256 questions
Implements automated pipeline for quiz question quality control:

**Phase 1: Context Extraction**
- Script: extract-audit-context.py
- Resolves doc_reference anchors to guide sections (97.3% success)
- Multi-file support (ultimate-guide.md, learning-with-ai.md, etc.)
- Fuzzy matching + substring fallback
- Output: audit-context.json (256 questions + context)

**Phase 2: Batch Generation**
- Script: generate-audit-batches.py
- 16 prioritized review batches by category
- Advanced Patterns split into 2 batches (29 questions)
- Embedded review instructions in each batch
- Output: audit-batches/*.md (16,559 lines)

**Phase 3: Report Compilation**
- Script: generate-audit-report.py
- Parses agent review outputs (PASS/ISSUE format)
- Aggregates by severity (critical/warning/info)
- Output: audit-report.md

**Validation:**
- Q01-001 error found immediately (curl vs npm contradiction)
- System working as designed 

**Documentation:**
- AUDIT-WORKFLOW.md (complete 5-phase guide)
- AUDIT-SYSTEM-SUMMARY.md (architecture + metrics)
- IMPLEMENTATION-COMPLETE.md (status + validation)
- DEMO-REVIEW-OUTPUT.txt (example review)

**Next Steps:** Manual agent reviews (16 batches, ~2-3 hours)

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-02-04 16:45:47 +01:00

250 lines
8.6 KiB
Python
Executable file

#!/usr/bin/env python3
"""
Generate audit report from agent reviews.
Collects agent review outputs and compiles them into a comprehensive report.
Input: claudedocs/audit-reviews/*.txt (agent outputs)
Output: claudedocs/audit-report.md
Usage:
python3 scripts/generate-audit-report.py
"""
import re
from collections import defaultdict
from pathlib import Path
from typing import Dict, List, Tuple
BASE_DIR = Path(__file__).parent.parent
REVIEWS_DIR = BASE_DIR / "claudedocs" / "audit-reviews"
OUTPUT_REPORT = BASE_DIR / "claudedocs" / "audit-report.md"
def parse_review_file(filepath: Path) -> Dict:
"""
Parse agent review output.
Expected format:
PASS: Q01-001
ISSUE: Q01-002 - [critical] CORRECT_ANSWER - Description
ISSUE: Q01-003 - [warning] AMBIGUITY - Description
"""
content = filepath.read_text()
results = {
'pass': [],
'issues': []
}
for line in content.split('\n'):
line = line.strip()
if not line:
continue
if line.startswith('PASS:'):
q_id = line.replace('PASS:', '').strip()
results['pass'].append(q_id)
elif line.startswith('ISSUE:'):
# Parse: ISSUE: Q01-002 - [critical] CORRECT_ANSWER - Description
match = re.match(r'ISSUE:\s+(Q\d+-\d+)\s+-\s+\[(\w+)\]\s+(\w+)\s+-\s+(.+)', line)
if match:
q_id, severity, issue_type, description = match.groups()
results['issues'].append({
'q_id': q_id,
'severity': severity,
'type': issue_type,
'description': description
})
return results
def generate_report(all_reviews: List[Dict]) -> str:
"""Generate comprehensive audit report."""
lines = []
# Header
lines.append("# Quiz Question Audit Report")
lines.append("")
lines.append(f"**Generated**: 2026-02-04")
lines.append("")
lines.append("---")
lines.append("")
# Aggregate statistics
total_pass = sum(len(r['pass']) for r in all_reviews)
total_issues = sum(len(r['issues']) for r in all_reviews)
total_questions = total_pass + total_issues
critical_count = sum(1 for r in all_reviews for i in r['issues'] if i['severity'] == 'critical')
warning_count = sum(1 for r in all_reviews for i in r['issues'] if i['severity'] == 'warning')
info_count = sum(1 for r in all_reviews for i in r['issues'] if i['severity'] == 'info')
lines.append("## Executive Summary")
lines.append("")
lines.append(f"**Total Questions Reviewed**: {total_questions}")
lines.append(f"**Pass**: {total_pass} ({total_pass/total_questions*100:.1f}%)")
lines.append(f"**Issues Found**: {total_issues} ({total_issues/total_questions*100:.1f}%)")
lines.append("")
lines.append("### Issue Breakdown")
lines.append("")
lines.append(f"- **Critical**: {critical_count} (wrong answer, major factual error)")
lines.append(f"- **Warning**: {warning_count} (ambiguous, outdated, misleading)")
lines.append(f"- **Info**: {info_count} (minor wording, trivial)")
lines.append("")
lines.append("---")
lines.append("")
# Critical issues
lines.append("## Critical Issues (Immediate Fix Required)")
lines.append("")
critical_issues = [i for r in all_reviews for i in r['issues'] if i['severity'] == 'critical']
if critical_issues:
for issue in sorted(critical_issues, key=lambda x: x['q_id']):
lines.append(f"### {issue['q_id']}")
lines.append("")
lines.append(f"**Type**: {issue['type']}")
lines.append(f"**Issue**: {issue['description']}")
lines.append("")
else:
lines.append("*No critical issues found.*")
lines.append("")
lines.append("---")
lines.append("")
# Warnings
lines.append("## Warnings (Review & Consider Fixing)")
lines.append("")
warning_issues = [i for r in all_reviews for i in r['issues'] if i['severity'] == 'warning']
if warning_issues:
# Group by type
by_type = defaultdict(list)
for issue in warning_issues:
by_type[issue['type']].append(issue)
for issue_type, issues in sorted(by_type.items()):
lines.append(f"### {issue_type} ({len(issues)} questions)")
lines.append("")
for issue in sorted(issues, key=lambda x: x['q_id']):
lines.append(f"- **{issue['q_id']}**: {issue['description']}")
lines.append("")
else:
lines.append("*No warnings found.*")
lines.append("")
lines.append("---")
lines.append("")
# Info
lines.append("## Info (Minor Issues)")
lines.append("")
info_issues = [i for r in all_reviews for i in r['issues'] if i['severity'] == 'info']
if info_issues:
for issue in sorted(info_issues, key=lambda x: x['q_id']):
lines.append(f"- **{issue['q_id']}** ({issue['type']}): {issue['description']}")
lines.append("")
else:
lines.append("*No info issues found.*")
lines.append("")
lines.append("---")
lines.append("")
# Health by category
lines.append("## Health by Category")
lines.append("")
lines.append("| Category | Pass | Issues | Pass Rate |")
lines.append("|----------|------|--------|-----------|")
# Extract category from Q01-001 format
by_category = defaultdict(lambda: {'pass': 0, 'issues': 0})
for review in all_reviews:
for q_id in review['pass']:
cat = q_id.split('-')[0]
by_category[cat]['pass'] += 1
for issue in review['issues']:
cat = issue['q_id'].split('-')[0]
by_category[cat]['issues'] += 1
for cat in sorted(by_category.keys()):
stats = by_category[cat]
total = stats['pass'] + stats['issues']
pass_rate = stats['pass'] / total * 100 if total > 0 else 0
lines.append(f"| Category {cat} | {stats['pass']} | {stats['issues']} | {pass_rate:.1f}% |")
lines.append("")
lines.append("---")
lines.append("")
# Next steps
lines.append("## Recommended Actions")
lines.append("")
lines.append("1. **Fix Critical Issues** (Priority 1)")
lines.append(" - Review each critical issue")
lines.append(" - Fix question/answer or update explanation")
lines.append(" - Rebuild: `python3 scripts/build-questions.py`")
lines.append("")
lines.append("2. **Review Warnings** (Priority 2)")
lines.append(" - Evaluate ambiguities and outdated info")
lines.append(" - Decide: fix, clarify, or accept")
lines.append("")
lines.append("3. **Consider Info Issues** (Priority 3)")
lines.append(" - Minor improvements for quality")
lines.append("")
return '\n'.join(lines)
def main():
"""Main entry point."""
print("═══════════════════════════════════════════════════════════════")
print("Quiz Question Audit Report Generation")
print("═══════════════════════════════════════════════════════════════")
print()
if not REVIEWS_DIR.exists():
print(f"Error: Reviews directory not found: {REVIEWS_DIR}", file=sys.stderr)
print("Place agent review outputs in claudedocs/audit-reviews/*.txt", file=sys.stderr)
return 1
# Find review files
review_files = sorted(REVIEWS_DIR.glob('*.txt'))
if not review_files:
print(f"Error: No review files found in {REVIEWS_DIR}", file=sys.stderr)
return 1
print(f"Found {len(review_files)} review files")
print()
# Parse all reviews
all_reviews = []
for filepath in review_files:
print(f"Parsing {filepath.name}...")
review = parse_review_file(filepath)
all_reviews.append(review)
print(f" Pass: {len(review['pass'])}, Issues: {len(review['issues'])}")
print()
# Generate report
report = generate_report(all_reviews)
# Write output
OUTPUT_REPORT.write_text(report)
print("═══════════════════════════════════════════════════════════════")
print(f"✓ Report generated: {OUTPUT_REPORT}")
print("═══════════════════════════════════════════════════════════════")
return 0
if __name__ == '__main__':
import sys
sys.exit(main())