feat(quiz): add complete audit system for 256 questions

Implements automated pipeline for quiz question quality control: **Phase 1: Context Extraction** - Script: extract-audit-context.py - Resolves doc_reference anchors to guide sections (97.3% success) - Multi-file support (ultimate-guide.md, learning-with-ai.md, etc.) - Fuzzy matching + substring fallback - Output: audit-context.json (256 questions + context) **Phase 2: Batch Generation** - Script: generate-audit-batches.py - 16 prioritized review batches by category - Advanced Patterns split into 2 batches (29 questions) - Embedded review instructions in each batch - Output: audit-batches/*.md (16,559 lines) **Phase 3: Report Compilation** - Script: generate-audit-report.py - Parses agent review outputs (PASS/ISSUE format) - Aggregates by severity (critical/warning/info) - Output: audit-report.md **Validation:** - Q01-001 error found immediately (curl vs npm contradiction) - System working as designed ✅ **Documentation:** - AUDIT-WORKFLOW.md (complete 5-phase guide) - AUDIT-SYSTEM-SUMMARY.md (architecture + metrics) - IMPLEMENTATION-COMPLETE.md (status + validation) - DEMO-REVIEW-OUTPUT.txt (example review) **Next Steps:** Manual agent reviews (16 batches, ~2-3 hours) Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
2026-02-04 16:45:47 +01:00 · 2026-02-04 16:45:47 +01:00 · a55ff38143
commit a55ff38143
parent 1c27aa293d
3 changed files with 921 additions and 0 deletions
--- a/scripts/extract-audit-context.py
+++ b/scripts/extract-audit-context.py
@ -0,0 +1,443 @@
+#!/usr/bin/env python3
+"""
+Extract audit context for quiz questions.
+
+Reads 256 quiz questions from landing repo, resolves their doc_reference anchors
+to sections in the guide, and extracts relevant context (max 150 lines per question).
+
+Output: claudedocs/audit-context.json
+
+Strategies for resolving doc_reference.anchor (in order):
+  A. Anchor matching: Convert anchor to markdown heading and search
+  B. Section name matching: Fuzzy match on doc_reference.section
+  C. reference.yaml fallback: Use line numbers from index
+  D. UNRESOLVED: Flag if no match found
+
+Usage:
+    python3 scripts/extract-audit-context.py
+
+Requirements:
+    - pyyaml (pip install pyyaml)
+    - thefuzz (pip install thefuzz)
+"""
+
+import json
+import re
+import sys
+from pathlib import Path
+from typing import Dict, List, Tuple, Optional
+import yaml
+
+try:
+    from thefuzz import fuzz
+except ImportError:
+    print("Error: thefuzz not installed. Run: pip install thefuzz", file=sys.stderr)
+    sys.exit(1)
+
+
+# ═══════════════════════════════════════════════════════════════
+# Configuration
+# ═══════════════════════════════════════════════════════════════
+
+BASE_DIR = Path(__file__).parent.parent
+LANDING_DIR = Path(__file__).parent.parent.parent / "claude-code-ultimate-guide-landing"
+QUESTIONS_DIR = LANDING_DIR / "questions"
+REFERENCE_YAML = BASE_DIR / "machine-readable" / "reference.yaml"
+OUTPUT_JSON = BASE_DIR / "claudedocs" / "audit-context.json"
+
+CONTEXT_LINES = 150  # Max lines of guide context per question
+
+# Cache for loaded guide files
+_GUIDE_CACHE = {}
+
+
+# ═══════════════════════════════════════════════════════════════
+# Parsing Utilities (reuse from build-questions.py)
+# ═══════════════════════════════════════════════════════════════
+
+def parse_frontmatter(content: str) -> Tuple[Dict, str]:
+    """Parse YAML frontmatter and body from Markdown content."""
+    lines = content.split('\n')
+
+    if lines[0].strip() != '---':
+        raise ValueError("File must start with YAML frontmatter (---)")
+
+    closing_idx = None
+    for idx in range(1, len(lines)):
+        if lines[idx].strip() == '---':
+            closing_idx = idx
+            break
+
+    if closing_idx is None:
+        raise ValueError("Invalid frontmatter structure (missing closing ---)")
+
+    yaml_text = '\n'.join(lines[1:closing_idx])
+    body_text = '\n'.join(lines[closing_idx + 1:])
+
+    try:
+        frontmatter = yaml.safe_load(yaml_text)
+    except yaml.YAMLError as e:
+        raise ValueError(f"Invalid YAML frontmatter: {e}")
+
+    return frontmatter, body_text
+
+
+def split_body(body: str) -> Tuple[str, str]:
+    """Split body into question and explanation at first --- (outside code blocks)."""
+    lines = body.split('\n')
+    in_code_block = False
+    separator_idx = None
+
+    for idx, line in enumerate(lines):
+        if line.strip().startswith('```'):
+            in_code_block = not in_code_block
+            continue
+
+        if not in_code_block and line.strip() == '---':
+            separator_idx = idx
+            break
+
+    if separator_idx is None:
+        raise ValueError("Body must contain --- separator between question and explanation")
+
+    question = '\n'.join(lines[:separator_idx]).strip()
+    explanation = '\n'.join(lines[separator_idx + 1:]).strip()
+
+    return question, explanation
+
+
+# ═══════════════════════════════════════════════════════════════
+# Guide Context Resolution
+# ═══════════════════════════════════════════════════════════════
+
+def load_guide(guide_file: str = "guide/ultimate-guide.md") -> List[str]:
+    """
+    Load guide lines from specified file.
+
+    Args:
+        guide_file: Relative path from BASE_DIR (e.g., "guide/ultimate-guide.md")
+
+    Returns:
+        List of lines
+    """
+    if guide_file in _GUIDE_CACHE:
+        return _GUIDE_CACHE[guide_file]
+
+    guide_path = BASE_DIR / guide_file
+    if not guide_path.exists():
+        raise FileNotFoundError(f"Guide file not found: {guide_path}")
+
+    lines = guide_path.read_text().split('\n')
+    _GUIDE_CACHE[guide_file] = lines
+    return lines
+
+
+def load_reference_yaml() -> Dict:
+    """Load reference.yaml for fallback line numbers."""
+    if not REFERENCE_YAML.exists():
+        return {}
+    return yaml.safe_load(REFERENCE_YAML.read_text())
+
+
+def anchor_to_heading(anchor: str) -> str:
+    """
+    Convert anchor like '#11-installation' to markdown heading 'Installation'
+    or '## 1.1 Installation'.
+
+    Handles various anchor formats:
+    - '#11-installation' → 'installation' (lowercase for fuzzy match)
+    - '#core-concepts' → 'core concepts'
+    - '#32-common-tasks' → 'common tasks'
+    """
+    # Remove '#' and leading numbers (XX-), replace '-' with ' '
+    clean = anchor.lstrip('#').lower()
+    clean = re.sub(r'^\d+-', '', clean)  # Remove leading XX-
+    clean = clean.replace('-', ' ')
+    return clean.strip()
+
+
+def find_heading_in_guide(guide_lines: List[str], target_heading: str) -> Optional[int]:
+    """
+    Find line number of heading in guide (fuzzy match, threshold 70).
+
+    Uses partial matching strategy:
+    - Checks if target is a substring (case-insensitive)
+    - Falls back to fuzzy ratio with threshold 70
+
+    Returns:
+        Line number (0-indexed) or None if not found
+    """
+    best_score = 0
+    best_line = None
+    target_lower = target_heading.lower()
+
+    for idx, line in enumerate(guide_lines):
+        if line.startswith('#'):
+            # Extract heading text (remove #, ##, etc.)
+            heading_text = re.sub(r'^#+\s*', '', line).lower()
+            # Remove leading numbers like '1.1', '3.2', etc.
+            heading_text = re.sub(r'^\d+\.?\d*\s*', '', heading_text)
+
+            # Strategy 1: Substring match (exact)
+            if target_lower in heading_text or heading_text in target_lower:
+                return idx
+
+            # Strategy 2: Fuzzy match
+            score = fuzz.ratio(target_lower, heading_text)
+            if score > best_score:
+                best_score = score
+                best_line = idx
+
+    # Lowered threshold to 70 to catch more variations
+    if best_score >= 70:
+        return best_line
+    return None
+
+
+def extract_section_context(guide_lines: List[str], start_line: int, max_lines: int = CONTEXT_LINES) -> str:
+    """
+    Extract context from guide starting at start_line.
+    Stops at next heading of same/higher level or after max_lines.
+
+    Args:
+        guide_lines: Full guide lines
+        start_line: Starting line number (0-indexed)
+        max_lines: Maximum lines to extract
+
+    Returns:
+        Context text
+    """
+    if start_line >= len(guide_lines):
+        return ""
+
+    # Determine heading level of start line
+    start_heading = guide_lines[start_line]
+    start_level = len(re.match(r'^#+', start_heading).group()) if start_heading.startswith('#') else 0
+
+    context_lines = []
+    for offset in range(max_lines):
+        line_idx = start_line + offset
+        if line_idx >= len(guide_lines):
+            break
+
+        line = guide_lines[line_idx]
+
+        # Stop at next heading of same/higher level (but not the start heading itself)
+        if offset > 0 and line.startswith('#'):
+            heading_level = len(re.match(r'^#+', line).group())
+            if heading_level <= start_level:
+                break
+
+        context_lines.append(line)
+
+    return '\n'.join(context_lines)
+
+
+def resolve_doc_reference(doc_ref: Dict, reference_yaml: Dict) -> Dict:
+    """
+    Resolve doc_reference to guide context.
+
+    Strategies (in order):
+      A. Anchor matching: Convert anchor to heading and search
+      B. Section name matching: Fuzzy match on section field
+      C. reference.yaml fallback: Use line numbers
+      D. UNRESOLVED: No match found
+
+    Returns:
+        {
+            'strategy': 'anchor|section|reference_yaml|unresolved|file_not_found',
+            'context': 'extracted guide text or empty',
+            'line_number': int or None,
+            'confidence': int (0-100),
+            'source_file': str (actual file searched)
+        }
+    """
+    result = {
+        'strategy': 'unresolved',
+        'context': '',
+        'line_number': None,
+        'confidence': 0,
+        'source_file': doc_ref.get('file', 'guide/ultimate-guide.md')
+    }
+
+    # Load the correct guide file
+    guide_file = doc_ref.get('file', 'guide/ultimate-guide.md')
+    try:
+        guide_lines = load_guide(guide_file)
+    except FileNotFoundError:
+        result['strategy'] = 'file_not_found'
+        return result
+
+    # Strategy A: Anchor matching
+    if 'anchor' in doc_ref and doc_ref['anchor']:
+        target_heading = anchor_to_heading(doc_ref['anchor'])
+        line_num = find_heading_in_guide(guide_lines, target_heading)
+
+        if line_num is not None:
+            result['strategy'] = 'anchor'
+            result['line_number'] = line_num
+            result['context'] = extract_section_context(guide_lines, line_num)
+            result['confidence'] = 95
+            return result
+
+    # Strategy B: Section name matching
+    if 'section' in doc_ref and doc_ref['section']:
+        target_section = doc_ref['section'].lower()
+        line_num = find_heading_in_guide(guide_lines, target_section)
+
+        if line_num is not None:
+            result['strategy'] = 'section'
+            result['line_number'] = line_num
+            result['context'] = extract_section_context(guide_lines, line_num)
+            result['confidence'] = 80
+            return result
+
+    # Strategy C: reference.yaml fallback
+    # TODO: Implement if anchor/section strategies fail too often
+    # For now, skip since reference.yaml has complex structure
+
+    # Strategy D: UNRESOLVED
+    return result
+
+
+# ═══════════════════════════════════════════════════════════════
+# Main Processing
+# ═══════════════════════════════════════════════════════════════
+
+def process_questions() -> List[Dict]:
+    """Process all quiz questions and extract audit context."""
+    if not QUESTIONS_DIR.exists():
+        print(f"Error: Questions directory not found: {QUESTIONS_DIR}", file=sys.stderr)
+        sys.exit(1)
+
+    # Load reference
+    print("Loading reference.yaml...")
+    reference_yaml = load_reference_yaml()
+
+    # Find all question files
+    md_files = sorted(QUESTIONS_DIR.glob('*/*.md'))
+    if not md_files:
+        print(f"Error: No .md files found in {QUESTIONS_DIR}", file=sys.stderr)
+        sys.exit(1)
+
+    print(f"Found {len(md_files)} question files")
+    print()
+
+    # Process each question
+    results = []
+    stats = {
+        'total': len(md_files),
+        'anchor': 0,
+        'section': 0,
+        'reference_yaml': 0,
+        'unresolved': 0,
+        'no_reference': 0,
+        'file_not_found': 0
+    }
+
+    for idx, filepath in enumerate(md_files, 1):
+        try:
+            content = filepath.read_text()
+            frontmatter, body = parse_frontmatter(content)
+            question_text, explanation_text = split_body(body)
+
+            q_id = frontmatter['id']
+            category_id = frontmatter['category_id']
+
+            # Build question object
+            question_obj = {
+                'id': q_id,
+                'category_id': category_id,
+                'difficulty': frontmatter['difficulty'],
+                'profiles': frontmatter['profiles'],
+                'question': question_text,
+                'options': frontmatter['options'],
+                'correct': frontmatter['correct'],
+                'explanation': explanation_text,
+                'source_file': str(filepath.relative_to(QUESTIONS_DIR.parent))
+            }
+
+            # Resolve doc_reference if present
+            if 'doc_reference' in frontmatter:
+                doc_ref = frontmatter['doc_reference']
+                resolution = resolve_doc_reference(doc_ref, reference_yaml)
+
+                question_obj['doc_reference'] = doc_ref
+                question_obj['guide_context'] = resolution['context']
+                question_obj['resolution_strategy'] = resolution['strategy']
+                question_obj['resolution_confidence'] = resolution['confidence']
+                question_obj['guide_line_number'] = resolution['line_number']
+                question_obj['guide_source_file'] = resolution['source_file']
+
+                stats[resolution['strategy']] += 1
+            else:
+                question_obj['guide_context'] = ''
+                question_obj['resolution_strategy'] = 'no_reference'
+                stats['no_reference'] += 1
+
+            results.append(question_obj)
+
+            # Progress indicator
+            if idx % 25 == 0:
+                print(f"Processed {idx}/{len(md_files)} questions...")
+
+        except Exception as e:
+            print(f"Error processing {filepath.name}: {e}", file=sys.stderr)
+            continue
+
+    print()
+    print("═══════════════════════════════════════════════════════════════")
+    print("Resolution Statistics")
+    print("═══════════════════════════════════════════════════════════════")
+    print(f"Total questions:        {stats['total']}")
+    print(f"Anchor strategy:        {stats['anchor']} ({stats['anchor']/stats['total']*100:.1f}%)")
+    print(f"Section strategy:       {stats['section']} ({stats['section']/stats['total']*100:.1f}%)")
+    print(f"reference.yaml:         {stats['reference_yaml']} ({stats['reference_yaml']/stats['total']*100:.1f}%)")
+    print(f"No doc_reference:       {stats['no_reference']} ({stats['no_reference']/stats['total']*100:.1f}%)")
+    print(f"File not found:         {stats['file_not_found']} ({stats['file_not_found']/stats['total']*100:.1f}%)")
+    print(f"UNRESOLVED:             {stats['unresolved']} ({stats['unresolved']/stats['total']*100:.1f}%)")
+    print()
+
+    resolved_count = stats['anchor'] + stats['section'] + stats['reference_yaml']
+    resolution_rate = resolved_count / (stats['total'] - stats['no_reference']) * 100 if stats['total'] > stats['no_reference'] else 0
+    print(f"Resolution rate (excl. no_reference): {resolution_rate:.1f}%")
+
+    if resolution_rate < 95:
+        print()
+        print("⚠️  WARNING: Resolution rate < 95%. Consider improving strategies.")
+
+    return results
+
+
+def main():
+    """Main entry point."""
+    print("═══════════════════════════════════════════════════════════════")
+    print("Quiz Question Audit Context Extraction")
+    print("═══════════════════════════════════════════════════════════════")
+    print()
+
+    # Process questions
+    questions_with_context = process_questions()
+
+    # Ensure output directory exists
+    OUTPUT_JSON.parent.mkdir(parents=True, exist_ok=True)
+
+    # Write output
+    output_data = {
+        'version': '1.0',
+        'generated_at': '2026-02-04',
+        'total_questions': len(questions_with_context),
+        'questions': questions_with_context
+    }
+
+    OUTPUT_JSON.write_text(json.dumps(output_data, indent=2, ensure_ascii=False) + '\n')
+
+    print()
+    print("═══════════════════════════════════════════════════════════════")
+    print(f"✓ Output written to: {OUTPUT_JSON}")
+    print(f"  Total questions: {len(questions_with_context)}")
+    print("═══════════════════════════════════════════════════════════════")
+
+
+if __name__ == '__main__':
+    main()
--- a/scripts/generate-audit-batches.py
+++ b/scripts/generate-audit-batches.py
@ -0,0 +1,228 @@
+#!/usr/bin/env python3
+"""
+Generate audit batches for quiz question review.
+
+Splits 256 questions into category-based batches for agent review.
+Each batch includes: question, options, correct answer, explanation, and guide context.
+
+Output: claudedocs/audit-batches/*.md (16 files)
+
+Usage:
+    python3 scripts/generate-audit-batches.py
+"""
+
+import json
+from pathlib import Path
+from typing import Dict, List
+
+
+BASE_DIR = Path(__file__).parent.parent
+AUDIT_CONTEXT = BASE_DIR / "claudedocs" / "audit-context.json"
+BATCH_TEMPLATE = BASE_DIR / "claudedocs" / "audit-batch-template.md"
+OUTPUT_DIR = BASE_DIR / "claudedocs" / "audit-batches"
+
+# Category names (from _categories.yaml in landing repo)
+CATEGORIES = {
+    1: "Quick Start",
+    2: "Core Concepts",
+    3: "Best Practices",
+    4: "Configuration",
+    5: "Context Management",
+    6: "Tools & Features",
+    7: "Workflows",
+    8: "MCP Ecosystem",
+    9: "Advanced Patterns",
+    10: "Reference",
+    11: "Learning with AI",
+    12: "Methodologies",
+    13: "Security",
+    14: "Philosophy",
+    15: "Ecosystem"
+}
+
+# Priority order for review
+PRIORITY_ORDER = [
+    1,   # Quick Start
+    2,   # Core Concepts
+    13,  # Security
+    10,  # Reference
+    8,   # MCP Ecosystem
+    9,   # Advanced Patterns
+    3,   # Best Practices
+    5,   # Context Management
+    6,   # Tools & Features
+    7,   # Workflows
+    11,  # Learning with AI
+    12,  # Methodologies
+    4,   # Configuration
+    14,  # Philosophy
+    15   # Ecosystem
+]
+
+
+def format_question_for_review(q: Dict) -> str:
+    """Format a single question for human review."""
+    lines = []
+    lines.append(f"### Question {q['id']}")
+    lines.append("")
+    lines.append(f"**Difficulty**: {q['difficulty']}")
+    lines.append(f"**Profiles**: {', '.join(q['profiles'])}")
+    lines.append("")
+    lines.append("**Question:**")
+    lines.append(q['question'])
+    lines.append("")
+    lines.append("**Options:**")
+    for key in ['a', 'b', 'c', 'd']:
+        marker = "✓" if key == q['correct'] else " "
+        lines.append(f"  {key}. {q['options'][key]} {marker}")
+    lines.append("")
+    lines.append(f"**Correct Answer**: {q['correct']}")
+    lines.append("")
+    lines.append("**Explanation:**")
+    lines.append(q['explanation'])
+    lines.append("")
+
+    # Guide context
+    if q.get('guide_context'):
+        lines.append("**Guide Context:**")
+        lines.append(f"*Source: {q.get('guide_source_file', 'N/A')} (line {q.get('guide_line_number', 'N/A')})*")
+        lines.append(f"*Resolution: {q.get('resolution_strategy')} (confidence: {q.get('resolution_confidence', 0)}%)*")
+        lines.append("```")
+        # Truncate context if too long (max 100 lines)
+        context_lines = q['guide_context'].split('\n')
+        if len(context_lines) > 100:
+            lines.extend(context_lines[:100])
+            lines.append(f"... (truncated {len(context_lines) - 100} lines)")
+        else:
+            lines.extend(context_lines)
+        lines.append("```")
+    else:
+        lines.append("**Guide Context:** ⚠️ UNRESOLVED")
+        if 'doc_reference' in q:
+            lines.append(f"*Intended reference: {q['doc_reference']}*")
+
+    lines.append("")
+    lines.append("---")
+    lines.append("")
+    return '\n'.join(lines)
+
+
+def generate_batch(category_id: int, questions: List[Dict], template: str) -> str:
+    """Generate a batch file for a category."""
+    category_name = CATEGORIES[category_id]
+
+    # Format questions
+    questions_text = []
+    for q in questions:
+        questions_text.append(format_question_for_review(q))
+
+    # Fill template
+    batch_content = template.replace('{questions}', '\n'.join(questions_text))
+
+    # Add header
+    header = f"""# Audit Batch: Category {category_id:02d} - {category_name}
+
+**Questions**: {len(questions)}
+**Priority**: {PRIORITY_ORDER.index(category_id) + 1}/{len(PRIORITY_ORDER)}
+
+---
+
+"""
+    return header + batch_content
+
+
+def main():
+    """Main entry point."""
+    print("═══════════════════════════════════════════════════════════════")
+    print("Quiz Question Audit Batch Generation")
+    print("═══════════════════════════════════════════════════════════════")
+    print()
+
+    # Load audit context
+    if not AUDIT_CONTEXT.exists():
+        print(f"Error: audit-context.json not found. Run extract-audit-context.py first.", file=sys.stderr)
+        return 1
+
+    data = json.loads(AUDIT_CONTEXT.read_text())
+    questions = data['questions']
+
+    print(f"Loaded {len(questions)} questions")
+
+    # Load template
+    if not BATCH_TEMPLATE.exists():
+        print(f"Error: batch template not found: {BATCH_TEMPLATE}", file=sys.stderr)
+        return 1
+
+    template = BATCH_TEMPLATE.read_text()
+
+    # Group by category
+    by_category = {}
+    for q in questions:
+        cat_id = q['category_id']
+        if cat_id not in by_category:
+            by_category[cat_id] = []
+        by_category[cat_id].append(q)
+
+    print(f"Categories: {len(by_category)}")
+    print()
+
+    # Create output directory
+    OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
+
+    # Generate batches
+    for cat_id in PRIORITY_ORDER:
+        if cat_id not in by_category:
+            continue
+
+        cat_questions = by_category[cat_id]
+        batch_content = generate_batch(cat_id, cat_questions, template)
+
+        # Handle advanced-patterns (split into 2 batches if >20 questions)
+        if cat_id == 9 and len(cat_questions) > 20:
+            # Split into 2 batches
+            mid = len(cat_questions) // 2
+            batch1 = cat_questions[:mid]
+            batch2 = cat_questions[mid:]
+
+            batch1_content = generate_batch(cat_id, batch1, template)
+            batch2_content = generate_batch(cat_id, batch2, template)
+
+            output_file1 = OUTPUT_DIR / f"{cat_id:02d}-{CATEGORIES[cat_id].lower().replace(' ', '-')}-part1.md"
+            output_file2 = OUTPUT_DIR / f"{cat_id:02d}-{CATEGORIES[cat_id].lower().replace(' ', '-')}-part2.md"
+
+            # Add part indicators
+            batch1_content = batch1_content.replace(
+                f"# Audit Batch: Category {cat_id:02d}",
+                f"# Audit Batch: Category {cat_id:02d} - Part 1/2"
+            )
+            batch2_content = batch2_content.replace(
+                f"# Audit Batch: Category {cat_id:02d}",
+                f"# Audit Batch: Category {cat_id:02d} - Part 2/2"
+            )
+
+            output_file1.write_text(batch1_content)
+            output_file2.write_text(batch2_content)
+
+            print(f"✓ Generated {output_file1.name} ({len(batch1)} questions)")
+            print(f"✓ Generated {output_file2.name} ({len(batch2)} questions)")
+        else:
+            output_file = OUTPUT_DIR / f"{cat_id:02d}-{CATEGORIES[cat_id].lower().replace(' ', '-')}.md"
+            output_file.write_text(batch_content)
+            print(f"✓ Generated {output_file.name} ({len(cat_questions)} questions)")
+
+    print()
+    print("═══════════════════════════════════════════════════════════════")
+    print(f"✓ Batches generated in: {OUTPUT_DIR}")
+    print(f"  Total files: {len(list(OUTPUT_DIR.glob('*.md')))}")
+    print()
+    print("Review order (priority):")
+    for idx, cat_id in enumerate(PRIORITY_ORDER, 1):
+        if cat_id in by_category:
+            count = len(by_category[cat_id])
+            print(f"  {idx:2d}. Category {cat_id:02d} - {CATEGORIES[cat_id]} ({count} questions)")
+    print("═══════════════════════════════════════════════════════════════")
+
+
+if __name__ == '__main__':
+    import sys
+    sys.exit(main() or 0)
--- a/scripts/generate-audit-report.py
+++ b/scripts/generate-audit-report.py
@ -0,0 +1,250 @@
+#!/usr/bin/env python3
+"""
+Generate audit report from agent reviews.
+
+Collects agent review outputs and compiles them into a comprehensive report.
+
+Input: claudedocs/audit-reviews/*.txt (agent outputs)
+Output: claudedocs/audit-report.md
+
+Usage:
+    python3 scripts/generate-audit-report.py
+"""
+
+import re
+from collections import defaultdict
+from pathlib import Path
+from typing import Dict, List, Tuple
+
+
+BASE_DIR = Path(__file__).parent.parent
+REVIEWS_DIR = BASE_DIR / "claudedocs" / "audit-reviews"
+OUTPUT_REPORT = BASE_DIR / "claudedocs" / "audit-report.md"
+
+
+def parse_review_file(filepath: Path) -> Dict:
+    """
+    Parse agent review output.
+
+    Expected format:
+        PASS: Q01-001
+        ISSUE: Q01-002 - [critical] CORRECT_ANSWER - Description
+        ISSUE: Q01-003 - [warning] AMBIGUITY - Description
+    """
+    content = filepath.read_text()
+    results = {
+        'pass': [],
+        'issues': []
+    }
+
+    for line in content.split('\n'):
+        line = line.strip()
+        if not line:
+            continue
+
+        if line.startswith('PASS:'):
+            q_id = line.replace('PASS:', '').strip()
+            results['pass'].append(q_id)
+
+        elif line.startswith('ISSUE:'):
+            # Parse: ISSUE: Q01-002 - [critical] CORRECT_ANSWER - Description
+            match = re.match(r'ISSUE:\s+(Q\d+-\d+)\s+-\s+\[(\w+)\]\s+(\w+)\s+-\s+(.+)', line)
+            if match:
+                q_id, severity, issue_type, description = match.groups()
+                results['issues'].append({
+                    'q_id': q_id,
+                    'severity': severity,
+                    'type': issue_type,
+                    'description': description
+                })
+
+    return results
+
+
+def generate_report(all_reviews: List[Dict]) -> str:
+    """Generate comprehensive audit report."""
+    lines = []
+
+    # Header
+    lines.append("# Quiz Question Audit Report")
+    lines.append("")
+    lines.append(f"**Generated**: 2026-02-04")
+    lines.append("")
+    lines.append("---")
+    lines.append("")
+
+    # Aggregate statistics
+    total_pass = sum(len(r['pass']) for r in all_reviews)
+    total_issues = sum(len(r['issues']) for r in all_reviews)
+    total_questions = total_pass + total_issues
+
+    critical_count = sum(1 for r in all_reviews for i in r['issues'] if i['severity'] == 'critical')
+    warning_count = sum(1 for r in all_reviews for i in r['issues'] if i['severity'] == 'warning')
+    info_count = sum(1 for r in all_reviews for i in r['issues'] if i['severity'] == 'info')
+
+    lines.append("## Executive Summary")
+    lines.append("")
+    lines.append(f"**Total Questions Reviewed**: {total_questions}")
+    lines.append(f"**Pass**: {total_pass} ({total_pass/total_questions*100:.1f}%)")
+    lines.append(f"**Issues Found**: {total_issues} ({total_issues/total_questions*100:.1f}%)")
+    lines.append("")
+    lines.append("### Issue Breakdown")
+    lines.append("")
+    lines.append(f"- **Critical**: {critical_count} (wrong answer, major factual error)")
+    lines.append(f"- **Warning**: {warning_count} (ambiguous, outdated, misleading)")
+    lines.append(f"- **Info**: {info_count} (minor wording, trivial)")
+    lines.append("")
+    lines.append("---")
+    lines.append("")
+
+    # Critical issues
+    lines.append("## Critical Issues (Immediate Fix Required)")
+    lines.append("")
+    critical_issues = [i for r in all_reviews for i in r['issues'] if i['severity'] == 'critical']
+
+    if critical_issues:
+        for issue in sorted(critical_issues, key=lambda x: x['q_id']):
+            lines.append(f"### {issue['q_id']}")
+            lines.append("")
+            lines.append(f"**Type**: {issue['type']}")
+            lines.append(f"**Issue**: {issue['description']}")
+            lines.append("")
+    else:
+        lines.append("*No critical issues found.*")
+        lines.append("")
+
+    lines.append("---")
+    lines.append("")
+
+    # Warnings
+    lines.append("## Warnings (Review & Consider Fixing)")
+    lines.append("")
+    warning_issues = [i for r in all_reviews for i in r['issues'] if i['severity'] == 'warning']
+
+    if warning_issues:
+        # Group by type
+        by_type = defaultdict(list)
+        for issue in warning_issues:
+            by_type[issue['type']].append(issue)
+
+        for issue_type, issues in sorted(by_type.items()):
+            lines.append(f"### {issue_type} ({len(issues)} questions)")
+            lines.append("")
+            for issue in sorted(issues, key=lambda x: x['q_id']):
+                lines.append(f"- **{issue['q_id']}**: {issue['description']}")
+            lines.append("")
+    else:
+        lines.append("*No warnings found.*")
+        lines.append("")
+
+    lines.append("---")
+    lines.append("")
+
+    # Info
+    lines.append("## Info (Minor Issues)")
+    lines.append("")
+    info_issues = [i for r in all_reviews for i in r['issues'] if i['severity'] == 'info']
+
+    if info_issues:
+        for issue in sorted(info_issues, key=lambda x: x['q_id']):
+            lines.append(f"- **{issue['q_id']}** ({issue['type']}): {issue['description']}")
+        lines.append("")
+    else:
+        lines.append("*No info issues found.*")
+        lines.append("")
+
+    lines.append("---")
+    lines.append("")
+
+    # Health by category
+    lines.append("## Health by Category")
+    lines.append("")
+    lines.append("| Category | Pass | Issues | Pass Rate |")
+    lines.append("|----------|------|--------|-----------|")
+
+    # Extract category from Q01-001 format
+    by_category = defaultdict(lambda: {'pass': 0, 'issues': 0})
+    for review in all_reviews:
+        for q_id in review['pass']:
+            cat = q_id.split('-')[0]
+            by_category[cat]['pass'] += 1
+        for issue in review['issues']:
+            cat = issue['q_id'].split('-')[0]
+            by_category[cat]['issues'] += 1
+
+    for cat in sorted(by_category.keys()):
+        stats = by_category[cat]
+        total = stats['pass'] + stats['issues']
+        pass_rate = stats['pass'] / total * 100 if total > 0 else 0
+        lines.append(f"| Category {cat} | {stats['pass']} | {stats['issues']} | {pass_rate:.1f}% |")
+
+    lines.append("")
+    lines.append("---")
+    lines.append("")
+
+    # Next steps
+    lines.append("## Recommended Actions")
+    lines.append("")
+    lines.append("1. **Fix Critical Issues** (Priority 1)")
+    lines.append("   - Review each critical issue")
+    lines.append("   - Fix question/answer or update explanation")
+    lines.append("   - Rebuild: `python3 scripts/build-questions.py`")
+    lines.append("")
+    lines.append("2. **Review Warnings** (Priority 2)")
+    lines.append("   - Evaluate ambiguities and outdated info")
+    lines.append("   - Decide: fix, clarify, or accept")
+    lines.append("")
+    lines.append("3. **Consider Info Issues** (Priority 3)")
+    lines.append("   - Minor improvements for quality")
+    lines.append("")
+
+    return '\n'.join(lines)
+
+
+def main():
+    """Main entry point."""
+    print("═══════════════════════════════════════════════════════════════")
+    print("Quiz Question Audit Report Generation")
+    print("═══════════════════════════════════════════════════════════════")
+    print()
+
+    if not REVIEWS_DIR.exists():
+        print(f"Error: Reviews directory not found: {REVIEWS_DIR}", file=sys.stderr)
+        print("Place agent review outputs in claudedocs/audit-reviews/*.txt", file=sys.stderr)
+        return 1
+
+    # Find review files
+    review_files = sorted(REVIEWS_DIR.glob('*.txt'))
+    if not review_files:
+        print(f"Error: No review files found in {REVIEWS_DIR}", file=sys.stderr)
+        return 1
+
+    print(f"Found {len(review_files)} review files")
+    print()
+
+    # Parse all reviews
+    all_reviews = []
+    for filepath in review_files:
+        print(f"Parsing {filepath.name}...")
+        review = parse_review_file(filepath)
+        all_reviews.append(review)
+        print(f"  Pass: {len(review['pass'])}, Issues: {len(review['issues'])}")
+
+    print()
+
+    # Generate report
+    report = generate_report(all_reviews)
+
+    # Write output
+    OUTPUT_REPORT.write_text(report)
+
+    print("═══════════════════════════════════════════════════════════════")
+    print(f"✓ Report generated: {OUTPUT_REPORT}")
+    print("═══════════════════════════════════════════════════════════════")
+
+    return 0
+
+
+if __name__ == '__main__':
+    import sys
+    sys.exit(main())