claude-code-ultimate-guide/scripts/resync-reference-yaml.py
Florian BRUNIAUX 8e63d84b47 docs: factual audit + reference sync — 260 findings corrected
Parallel 6-agent audit against official Anthropic docs (llms-full.txt).
Key corrections applied across permissions, hooks, MCP, security, privacy, reference.yaml.

Highlights:
- Fix MCP config path (~/.claude.json), mcpServers key, variable substitution syntax
- Fix permission modes (5 not 3), :* syntax (×6), Stop event description
- Fix hook JSON field names (hook_event_name, tool_name, tool_input, session_id)
- Fix filesystem restriction docs (permission rules, not settings.json keys)
- Fix data-privacy: 4-tier retention, /bug 5yr warning, ZDR conditions, 5 telemetry opt-out vars
- Add official llms.txt/llms-full.txt references to CLAUDE.md + machine-readable/llms.txt
- Reference.yaml: 375 entries re-synced (92% had wrong line numbers — guide grew 15K→21K lines)
- New script: scripts/resync-reference-yaml.py for automated line number sync
- Quiz: corrected answers for hooks (07), memory settings (03), MCP servers (08)

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-02-26 12:10:14 +01:00

406 lines
15 KiB
Python

#!/usr/bin/env python3
"""
Re-sync line numbers in machine-readable/reference.yaml
Strategy:
1. Build header index for each guide file (all lines starting with #)
2. For each reference.yaml entry with a line number, read what's at that line
3. If content doesn't look right, search by key-name keywords in headers
4. Output a patch file with proposed fixes + confidence scores
Usage:
python3 scripts/resync-reference-yaml.py [--apply]
Without --apply: prints report to stdout + saves claudedocs/resync-report.md
With --apply: applies HIGH CONFIDENCE fixes directly to reference.yaml
"""
import re
import sys
import os
from pathlib import Path
REPO_ROOT = Path(__file__).parent.parent
YAML_FILE = REPO_ROOT / "machine-readable" / "reference.yaml"
REPORT_FILE = REPO_ROOT / "claudedocs" / "resync-report.md"
# Files that reference.yaml points to (with bare integers → ultimate-guide.md)
GUIDE_FILES = {
"guide/ultimate-guide.md": None,
"guide/architecture.md": None,
"guide/workflows/iterative-refinement.md": None,
"guide/observability.md": None,
"guide/learning-with-ai.md": None,
"guide/ai-ecosystem.md": None,
"guide/ai-traceability.md": None,
"guide/sandbox-isolation.md": None,
"guide/sandbox-native.md": None,
"guide/known-issues.md": None,
"guide/third-party-tools.md": None,
"guide/adoption-approaches.md": None,
"examples/commands/review-pr.md": None,
"examples/agents/code-reviewer.md": None,
}
def build_header_index(filepath: Path) -> list[tuple[int, str]]:
"""Return list of (line_num, header_text) for all # headers."""
headers = []
try:
with open(filepath, encoding="utf-8") as f:
for i, line in enumerate(f, 1):
stripped = line.rstrip()
if stripped.startswith("#"):
headers.append((i, stripped))
except FileNotFoundError:
pass
return headers
def get_line_content(filepath: Path, line_num: int, context: int = 2) -> str:
"""Return content around a given line number."""
try:
with open(filepath, encoding="utf-8") as f:
lines = f.readlines()
start = max(0, line_num - 1 - context)
end = min(len(lines), line_num + context)
result = []
for i in range(start, end):
marker = ">>>" if i == line_num - 1 else " "
result.append(f"{marker} {i+1}: {lines[i].rstrip()}")
return "\n".join(result)
except FileNotFoundError:
return "(file not found)"
def key_to_keywords(key: str) -> list[str]:
"""Convert snake_case key to search keywords, filtering noise words."""
noise = {"the", "a", "an", "and", "or", "of", "in", "to", "for",
"is", "at", "by", "on", "it", "its", "from", "with",
"guide", "section", "line", "ref", "reference", "api",
"mode", "modes", "type", "types", "table", "example",
"examples", "list", "advanced", "basic", "overview", "vs"}
words = re.split(r"[_\-]", key.lower())
return [w for w in words if w not in noise and len(w) > 2]
def score_header(keywords: list[str], header_text: str) -> int:
"""Score a header based on keyword matches (higher = better)."""
header_lower = header_text.lower()
score = 0
for kw in keywords:
if kw in header_lower:
score += 1
# Partial match bonus
elif len(kw) > 4 and any(kw in w for w in header_lower.split()):
score += 0.5
return score
def find_best_header(key: str, headers: list[tuple[int, str]],
old_line: int) -> tuple[int | None, float, str]:
"""Find best matching header. Returns (new_line, confidence, header_text)."""
keywords = key_to_keywords(key)
if not keywords:
return None, 0.0, ""
best_score = 0
best_line = None
best_text = ""
second_best = 0
for line_num, header_text in headers:
score = score_header(keywords, header_text)
if score > best_score:
second_best = best_score
best_score = score
best_line = line_num
best_text = header_text
elif score > second_best:
second_best = score
if best_score == 0:
return None, 0.0, ""
# Confidence: high if best is clearly better than second best
if best_score >= 2 and (second_best == 0 or best_score / max(second_best, 0.1) >= 2):
confidence = min(1.0, best_score / max(len(keywords), 1))
else:
confidence = 0.4 * (best_score / max(len(keywords), 1))
return best_line, confidence, best_text
def parse_yaml_line_refs(yaml_content: str) -> list[dict]:
"""
Parse reference.yaml and extract all line number references.
Returns list of {key, file, old_line, yaml_line, raw_value}
"""
results = []
in_main_guide_section = False
lines = yaml_content.split("\n")
for i, line in enumerate(lines, 1):
stripped = line.strip()
if not stripped or stripped.startswith("#"):
# Track if we're in the main guide section (bare integers = ultimate-guide.md)
if "# Main guide (guide/ultimate-guide.md)" in stripped:
in_main_guide_section = True
elif stripped.startswith("#") and in_main_guide_section:
# New major section — stop treating bare ints as ultimate-guide.md
# But keep it true since the pattern continues after comments
pass
continue
# Pattern 1: key: "filepath:NNNN" or key: "filepath:NNNN" # comment
m = re.match(r'^(\s*)(\S+):\s*"([^"]+):(\d+)"', line)
if m:
key = m.group(2).rstrip(":")
filepath = m.group(3)
line_num = int(m.group(4))
results.append({
"key": key,
"file": filepath,
"old_line": line_num,
"yaml_line": i,
"raw_value": f'"{filepath}:{line_num}"',
"type": "string_ref",
})
continue
# Pattern 2: key: NNNN (bare integer, implies ultimate-guide.md if in that section)
m = re.match(r'^(\s*)(\S+):\s*(\d{3,5})\s*(?:#.*)?$', line)
if m:
key = m.group(2).rstrip(":")
line_num = int(m.group(3))
# Heuristic: if line_num > 100 and key looks like a content reference
# (not a count, score, year etc.)
if line_num > 100 and not any(x in key for x in
["count", "score", "stars", "year", "limit", "budget",
"savings", "total", "ratio", "budget", "sizing"]):
results.append({
"key": key,
"file": "guide/ultimate-guide.md",
"old_line": line_num,
"yaml_line": i,
"raw_value": str(line_num),
"type": "bare_int",
})
return results
def validate_line(filepath: Path, line_num: int) -> str:
"""Return the content at a given line (or error)."""
try:
with open(filepath, encoding="utf-8") as f:
lines = f.readlines()
if 1 <= line_num <= len(lines):
return lines[line_num - 1].rstrip()
return f"(line {line_num} out of range, file has {len(lines)} lines)"
except FileNotFoundError:
return "(file not found)"
def is_sensible_content(key: str, content: str) -> bool:
"""Quick check if content at old line is plausibly related to the key."""
if not content or content.startswith("("):
return False
keywords = key_to_keywords(key)
if not keywords:
return True
content_lower = content.lower()
matches = sum(1 for kw in keywords if kw in content_lower)
return matches >= max(1, len(keywords) // 2)
def main():
apply_fixes = "--apply" in sys.argv
print("Reading reference.yaml...")
with open(YAML_FILE, encoding="utf-8") as f:
yaml_content = f.read()
print("Building header indexes...")
header_indexes = {}
for rel_path in GUIDE_FILES:
abs_path = REPO_ROOT / rel_path
idx = build_header_index(abs_path)
header_indexes[rel_path] = idx
if idx:
print(f" {rel_path}: {len(idx)} headers")
else:
print(f" {rel_path}: NOT FOUND or no headers")
print("\nParsing YAML references...")
refs = parse_yaml_line_refs(yaml_content)
print(f"Found {len(refs)} line number references")
report_lines = [
"# Re-sync Report: machine-readable/reference.yaml",
f"Generated: 2026-02-25",
f"Total references scanned: {len(refs)}",
"",
]
corrections = [] # (yaml_line, old_value, new_value, key)
stats = {"ok": 0, "high": 0, "medium": 0, "low": 0, "unknown": 0, "file_missing": 0}
ok_entries = []
needs_fix = []
for ref in refs:
key = ref["key"]
rel_path = ref["file"]
old_line = ref["old_line"]
abs_path = REPO_ROOT / rel_path
current_content = validate_line(abs_path, old_line)
headers = header_indexes.get(rel_path, [])
if not headers and not abs_path.exists():
stats["file_missing"] += 1
needs_fix.append({**ref, "status": "FILE_MISSING", "new_line": None,
"confidence": 0, "current_content": current_content,
"suggested_header": ""})
continue
sensible = is_sensible_content(key, current_content)
if sensible:
stats["ok"] += 1
ok_entries.append({**ref, "current_content": current_content})
else:
new_line, confidence, header_text = find_best_header(key, headers, old_line)
if confidence >= 0.7:
level = "HIGH"
stats["high"] += 1
elif confidence >= 0.4:
level = "MEDIUM"
stats["medium"] += 1
elif new_line:
level = "LOW"
stats["low"] += 1
else:
level = "UNKNOWN"
stats["unknown"] += 1
needs_fix.append({
**ref,
"status": level,
"new_line": new_line,
"confidence": confidence,
"current_content": current_content,
"suggested_header": header_text,
})
if new_line and level in ("HIGH", "MEDIUM"):
if ref["type"] == "string_ref":
old_val = f'"{rel_path}:{old_line}"'
new_val = f'"{rel_path}:{new_line}"'
else:
old_val = str(old_line)
new_val = str(new_line)
corrections.append((ref["yaml_line"], old_val, new_val, key))
# Build report
report_lines += [
"## Summary",
"",
f"| Status | Count |",
f"|--------|-------|",
f"| OK (content matches) | {stats['ok']} |",
f"| HIGH confidence fix | {stats['high']} |",
f"| MEDIUM confidence fix | {stats['medium']} |",
f"| LOW confidence (manual review) | {stats['low']} |",
f"| UNKNOWN (no match found) | {stats['unknown']} |",
f"| FILE MISSING | {stats['file_missing']} |",
f"| **Total** | **{len(refs)}** |",
"",
f"Auto-fixable (HIGH + MEDIUM): **{stats['high'] + stats['medium']}**",
"",
]
if needs_fix:
report_lines += ["## Entries Needing Correction", ""]
for entry in sorted(needs_fix, key=lambda x: (-x["confidence"], x["key"])):
status = entry["status"]
emoji = {"HIGH": "", "MEDIUM": "⚠️", "LOW": "🔶", "UNKNOWN": "", "FILE_MISSING": "🚫"}.get(status, "")
report_lines.append(f"### {emoji} {entry['key']} ({status}, conf={entry['confidence']:.2f})")
report_lines.append(f"- **File**: `{entry['file']}`")
report_lines.append(f"- **Old line**: {entry['old_line']}")
report_lines.append(f"- **Content at old line**: `{entry['current_content'][:100]}`")
if entry["new_line"]:
report_lines.append(f"- **Suggested line**: {entry['new_line']}")
report_lines.append(f"- **Header found**: `{entry['suggested_header']}`")
else:
report_lines.append(f"- **Suggested line**: (not found — manual search needed)")
report_lines.append("")
report_lines += ["## OK Entries (sample)", ""]
for entry in ok_entries[:20]:
report_lines.append(f"- `{entry['key']}`: line {entry['old_line']} → `{entry['current_content'][:80]}`")
report_content = "\n".join(report_lines)
# Write report
REPORT_FILE.parent.mkdir(exist_ok=True)
with open(REPORT_FILE, "w", encoding="utf-8") as f:
f.write(report_content)
print(f"\nReport saved to {REPORT_FILE}")
# Print summary
print(f"\n{'='*60}")
print(f"OK: {stats['ok']}")
print(f"HIGH fix: {stats['high']}")
print(f"MEDIUM fix: {stats['medium']}")
print(f"LOW fix: {stats['low']}")
print(f"UNKNOWN: {stats['unknown']}")
print(f"FILE MISSING: {stats['file_missing']}")
print(f"Auto-fixable: {stats['high'] + stats['medium']}")
print(f"{'='*60}")
if corrections:
print(f"\n{'='*60}")
print("PROPOSED CORRECTIONS (HIGH + MEDIUM confidence):")
print(f"{'='*60}")
for yaml_line, old_val, new_val, key in corrections[:30]:
print(f" Line {yaml_line:4d} | {key}")
print(f" {old_val}{new_val}")
if len(corrections) > 30:
print(f" ... and {len(corrections) - 30} more (see report)")
if apply_fixes and corrections:
print(f"\nApplying {len(corrections)} fixes to reference.yaml...")
content = yaml_content
applied = 0
for yaml_line_num, old_val, new_val, key in corrections:
# Find the exact occurrence of old_val near the yaml_line_num
# Use line-by-line replacement to be precise
lines = content.split("\n")
target_idx = yaml_line_num - 1
if target_idx < len(lines) and old_val in lines[target_idx]:
lines[target_idx] = lines[target_idx].replace(old_val, new_val, 1)
applied += 1
else:
# Try ±2 lines
for delta in [-1, 1, -2, 2]:
idx = target_idx + delta
if 0 <= idx < len(lines) and old_val in lines[idx]:
lines[idx] = lines[idx].replace(old_val, new_val, 1)
applied += 1
break
content = "\n".join(lines)
with open(YAML_FILE, "w", encoding="utf-8") as f:
f.write(content)
print(f"Applied {applied}/{len(corrections)} fixes.")
print(f"Run without --apply to verify remaining issues.")
elif not apply_fixes and corrections:
print(f"\nRun with --apply to apply HIGH+MEDIUM confidence fixes automatically.")
print(f"LOW and UNKNOWN confidence fixes require manual review (see report).")
if __name__ == "__main__":
main()