docs: factual audit + reference sync — 260 findings corrected
Parallel 6-agent audit against official Anthropic docs (llms-full.txt). Key corrections applied across permissions, hooks, MCP, security, privacy, reference.yaml. Highlights: - Fix MCP config path (~/.claude.json), mcpServers key, variable substitution syntax - Fix permission modes (5 not 3), :* syntax (×6), Stop event description - Fix hook JSON field names (hook_event_name, tool_name, tool_input, session_id) - Fix filesystem restriction docs (permission rules, not settings.json keys) - Fix data-privacy: 4-tier retention, /bug 5yr warning, ZDR conditions, 5 telemetry opt-out vars - Add official llms.txt/llms-full.txt references to CLAUDE.md + machine-readable/llms.txt - Reference.yaml: 375 entries re-synced (92% had wrong line numbers — guide grew 15K→21K lines) - New script: scripts/resync-reference-yaml.py for automated line number sync - Quiz: corrected answers for hooks (07), memory settings (03), MCP servers (08) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
ad735dfff4
commit
8e63d84b47
26 changed files with 1125 additions and 566 deletions
406
scripts/resync-reference-yaml.py
Normal file
406
scripts/resync-reference-yaml.py
Normal file
|
|
@ -0,0 +1,406 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Re-sync line numbers in machine-readable/reference.yaml
|
||||
|
||||
Strategy:
|
||||
1. Build header index for each guide file (all lines starting with #)
|
||||
2. For each reference.yaml entry with a line number, read what's at that line
|
||||
3. If content doesn't look right, search by key-name keywords in headers
|
||||
4. Output a patch file with proposed fixes + confidence scores
|
||||
|
||||
Usage:
|
||||
python3 scripts/resync-reference-yaml.py [--apply]
|
||||
|
||||
Without --apply: prints report to stdout + saves claudedocs/resync-report.md
|
||||
With --apply: applies HIGH CONFIDENCE fixes directly to reference.yaml
|
||||
"""
|
||||
|
||||
import re
|
||||
import sys
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
REPO_ROOT = Path(__file__).parent.parent
|
||||
YAML_FILE = REPO_ROOT / "machine-readable" / "reference.yaml"
|
||||
REPORT_FILE = REPO_ROOT / "claudedocs" / "resync-report.md"
|
||||
|
||||
# Files that reference.yaml points to (with bare integers → ultimate-guide.md)
|
||||
GUIDE_FILES = {
|
||||
"guide/ultimate-guide.md": None,
|
||||
"guide/architecture.md": None,
|
||||
"guide/workflows/iterative-refinement.md": None,
|
||||
"guide/observability.md": None,
|
||||
"guide/learning-with-ai.md": None,
|
||||
"guide/ai-ecosystem.md": None,
|
||||
"guide/ai-traceability.md": None,
|
||||
"guide/sandbox-isolation.md": None,
|
||||
"guide/sandbox-native.md": None,
|
||||
"guide/known-issues.md": None,
|
||||
"guide/third-party-tools.md": None,
|
||||
"guide/adoption-approaches.md": None,
|
||||
"examples/commands/review-pr.md": None,
|
||||
"examples/agents/code-reviewer.md": None,
|
||||
}
|
||||
|
||||
|
||||
def build_header_index(filepath: Path) -> list[tuple[int, str]]:
|
||||
"""Return list of (line_num, header_text) for all # headers."""
|
||||
headers = []
|
||||
try:
|
||||
with open(filepath, encoding="utf-8") as f:
|
||||
for i, line in enumerate(f, 1):
|
||||
stripped = line.rstrip()
|
||||
if stripped.startswith("#"):
|
||||
headers.append((i, stripped))
|
||||
except FileNotFoundError:
|
||||
pass
|
||||
return headers
|
||||
|
||||
|
||||
def get_line_content(filepath: Path, line_num: int, context: int = 2) -> str:
|
||||
"""Return content around a given line number."""
|
||||
try:
|
||||
with open(filepath, encoding="utf-8") as f:
|
||||
lines = f.readlines()
|
||||
start = max(0, line_num - 1 - context)
|
||||
end = min(len(lines), line_num + context)
|
||||
result = []
|
||||
for i in range(start, end):
|
||||
marker = ">>>" if i == line_num - 1 else " "
|
||||
result.append(f"{marker} {i+1}: {lines[i].rstrip()}")
|
||||
return "\n".join(result)
|
||||
except FileNotFoundError:
|
||||
return "(file not found)"
|
||||
|
||||
|
||||
def key_to_keywords(key: str) -> list[str]:
|
||||
"""Convert snake_case key to search keywords, filtering noise words."""
|
||||
noise = {"the", "a", "an", "and", "or", "of", "in", "to", "for",
|
||||
"is", "at", "by", "on", "it", "its", "from", "with",
|
||||
"guide", "section", "line", "ref", "reference", "api",
|
||||
"mode", "modes", "type", "types", "table", "example",
|
||||
"examples", "list", "advanced", "basic", "overview", "vs"}
|
||||
words = re.split(r"[_\-]", key.lower())
|
||||
return [w for w in words if w not in noise and len(w) > 2]
|
||||
|
||||
|
||||
def score_header(keywords: list[str], header_text: str) -> int:
|
||||
"""Score a header based on keyword matches (higher = better)."""
|
||||
header_lower = header_text.lower()
|
||||
score = 0
|
||||
for kw in keywords:
|
||||
if kw in header_lower:
|
||||
score += 1
|
||||
# Partial match bonus
|
||||
elif len(kw) > 4 and any(kw in w for w in header_lower.split()):
|
||||
score += 0.5
|
||||
return score
|
||||
|
||||
|
||||
def find_best_header(key: str, headers: list[tuple[int, str]],
|
||||
old_line: int) -> tuple[int | None, float, str]:
|
||||
"""Find best matching header. Returns (new_line, confidence, header_text)."""
|
||||
keywords = key_to_keywords(key)
|
||||
if not keywords:
|
||||
return None, 0.0, ""
|
||||
|
||||
best_score = 0
|
||||
best_line = None
|
||||
best_text = ""
|
||||
second_best = 0
|
||||
|
||||
for line_num, header_text in headers:
|
||||
score = score_header(keywords, header_text)
|
||||
if score > best_score:
|
||||
second_best = best_score
|
||||
best_score = score
|
||||
best_line = line_num
|
||||
best_text = header_text
|
||||
elif score > second_best:
|
||||
second_best = score
|
||||
|
||||
if best_score == 0:
|
||||
return None, 0.0, ""
|
||||
|
||||
# Confidence: high if best is clearly better than second best
|
||||
if best_score >= 2 and (second_best == 0 or best_score / max(second_best, 0.1) >= 2):
|
||||
confidence = min(1.0, best_score / max(len(keywords), 1))
|
||||
else:
|
||||
confidence = 0.4 * (best_score / max(len(keywords), 1))
|
||||
|
||||
return best_line, confidence, best_text
|
||||
|
||||
|
||||
def parse_yaml_line_refs(yaml_content: str) -> list[dict]:
|
||||
"""
|
||||
Parse reference.yaml and extract all line number references.
|
||||
Returns list of {key, file, old_line, yaml_line, raw_value}
|
||||
"""
|
||||
results = []
|
||||
in_main_guide_section = False
|
||||
lines = yaml_content.split("\n")
|
||||
|
||||
for i, line in enumerate(lines, 1):
|
||||
stripped = line.strip()
|
||||
if not stripped or stripped.startswith("#"):
|
||||
# Track if we're in the main guide section (bare integers = ultimate-guide.md)
|
||||
if "# Main guide (guide/ultimate-guide.md)" in stripped:
|
||||
in_main_guide_section = True
|
||||
elif stripped.startswith("#") and in_main_guide_section:
|
||||
# New major section — stop treating bare ints as ultimate-guide.md
|
||||
# But keep it true since the pattern continues after comments
|
||||
pass
|
||||
continue
|
||||
|
||||
# Pattern 1: key: "filepath:NNNN" or key: "filepath:NNNN" # comment
|
||||
m = re.match(r'^(\s*)(\S+):\s*"([^"]+):(\d+)"', line)
|
||||
if m:
|
||||
key = m.group(2).rstrip(":")
|
||||
filepath = m.group(3)
|
||||
line_num = int(m.group(4))
|
||||
results.append({
|
||||
"key": key,
|
||||
"file": filepath,
|
||||
"old_line": line_num,
|
||||
"yaml_line": i,
|
||||
"raw_value": f'"{filepath}:{line_num}"',
|
||||
"type": "string_ref",
|
||||
})
|
||||
continue
|
||||
|
||||
# Pattern 2: key: NNNN (bare integer, implies ultimate-guide.md if in that section)
|
||||
m = re.match(r'^(\s*)(\S+):\s*(\d{3,5})\s*(?:#.*)?$', line)
|
||||
if m:
|
||||
key = m.group(2).rstrip(":")
|
||||
line_num = int(m.group(3))
|
||||
# Heuristic: if line_num > 100 and key looks like a content reference
|
||||
# (not a count, score, year etc.)
|
||||
if line_num > 100 and not any(x in key for x in
|
||||
["count", "score", "stars", "year", "limit", "budget",
|
||||
"savings", "total", "ratio", "budget", "sizing"]):
|
||||
results.append({
|
||||
"key": key,
|
||||
"file": "guide/ultimate-guide.md",
|
||||
"old_line": line_num,
|
||||
"yaml_line": i,
|
||||
"raw_value": str(line_num),
|
||||
"type": "bare_int",
|
||||
})
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def validate_line(filepath: Path, line_num: int) -> str:
|
||||
"""Return the content at a given line (or error)."""
|
||||
try:
|
||||
with open(filepath, encoding="utf-8") as f:
|
||||
lines = f.readlines()
|
||||
if 1 <= line_num <= len(lines):
|
||||
return lines[line_num - 1].rstrip()
|
||||
return f"(line {line_num} out of range, file has {len(lines)} lines)"
|
||||
except FileNotFoundError:
|
||||
return "(file not found)"
|
||||
|
||||
|
||||
def is_sensible_content(key: str, content: str) -> bool:
|
||||
"""Quick check if content at old line is plausibly related to the key."""
|
||||
if not content or content.startswith("("):
|
||||
return False
|
||||
keywords = key_to_keywords(key)
|
||||
if not keywords:
|
||||
return True
|
||||
content_lower = content.lower()
|
||||
matches = sum(1 for kw in keywords if kw in content_lower)
|
||||
return matches >= max(1, len(keywords) // 2)
|
||||
|
||||
|
||||
def main():
|
||||
apply_fixes = "--apply" in sys.argv
|
||||
|
||||
print("Reading reference.yaml...")
|
||||
with open(YAML_FILE, encoding="utf-8") as f:
|
||||
yaml_content = f.read()
|
||||
|
||||
print("Building header indexes...")
|
||||
header_indexes = {}
|
||||
for rel_path in GUIDE_FILES:
|
||||
abs_path = REPO_ROOT / rel_path
|
||||
idx = build_header_index(abs_path)
|
||||
header_indexes[rel_path] = idx
|
||||
if idx:
|
||||
print(f" {rel_path}: {len(idx)} headers")
|
||||
else:
|
||||
print(f" {rel_path}: NOT FOUND or no headers")
|
||||
|
||||
print("\nParsing YAML references...")
|
||||
refs = parse_yaml_line_refs(yaml_content)
|
||||
print(f"Found {len(refs)} line number references")
|
||||
|
||||
report_lines = [
|
||||
"# Re-sync Report: machine-readable/reference.yaml",
|
||||
f"Generated: 2026-02-25",
|
||||
f"Total references scanned: {len(refs)}",
|
||||
"",
|
||||
]
|
||||
|
||||
corrections = [] # (yaml_line, old_value, new_value, key)
|
||||
stats = {"ok": 0, "high": 0, "medium": 0, "low": 0, "unknown": 0, "file_missing": 0}
|
||||
|
||||
ok_entries = []
|
||||
needs_fix = []
|
||||
|
||||
for ref in refs:
|
||||
key = ref["key"]
|
||||
rel_path = ref["file"]
|
||||
old_line = ref["old_line"]
|
||||
abs_path = REPO_ROOT / rel_path
|
||||
|
||||
current_content = validate_line(abs_path, old_line)
|
||||
headers = header_indexes.get(rel_path, [])
|
||||
|
||||
if not headers and not abs_path.exists():
|
||||
stats["file_missing"] += 1
|
||||
needs_fix.append({**ref, "status": "FILE_MISSING", "new_line": None,
|
||||
"confidence": 0, "current_content": current_content,
|
||||
"suggested_header": ""})
|
||||
continue
|
||||
|
||||
sensible = is_sensible_content(key, current_content)
|
||||
|
||||
if sensible:
|
||||
stats["ok"] += 1
|
||||
ok_entries.append({**ref, "current_content": current_content})
|
||||
else:
|
||||
new_line, confidence, header_text = find_best_header(key, headers, old_line)
|
||||
|
||||
if confidence >= 0.7:
|
||||
level = "HIGH"
|
||||
stats["high"] += 1
|
||||
elif confidence >= 0.4:
|
||||
level = "MEDIUM"
|
||||
stats["medium"] += 1
|
||||
elif new_line:
|
||||
level = "LOW"
|
||||
stats["low"] += 1
|
||||
else:
|
||||
level = "UNKNOWN"
|
||||
stats["unknown"] += 1
|
||||
|
||||
needs_fix.append({
|
||||
**ref,
|
||||
"status": level,
|
||||
"new_line": new_line,
|
||||
"confidence": confidence,
|
||||
"current_content": current_content,
|
||||
"suggested_header": header_text,
|
||||
})
|
||||
|
||||
if new_line and level in ("HIGH", "MEDIUM"):
|
||||
if ref["type"] == "string_ref":
|
||||
old_val = f'"{rel_path}:{old_line}"'
|
||||
new_val = f'"{rel_path}:{new_line}"'
|
||||
else:
|
||||
old_val = str(old_line)
|
||||
new_val = str(new_line)
|
||||
corrections.append((ref["yaml_line"], old_val, new_val, key))
|
||||
|
||||
# Build report
|
||||
report_lines += [
|
||||
"## Summary",
|
||||
"",
|
||||
f"| Status | Count |",
|
||||
f"|--------|-------|",
|
||||
f"| OK (content matches) | {stats['ok']} |",
|
||||
f"| HIGH confidence fix | {stats['high']} |",
|
||||
f"| MEDIUM confidence fix | {stats['medium']} |",
|
||||
f"| LOW confidence (manual review) | {stats['low']} |",
|
||||
f"| UNKNOWN (no match found) | {stats['unknown']} |",
|
||||
f"| FILE MISSING | {stats['file_missing']} |",
|
||||
f"| **Total** | **{len(refs)}** |",
|
||||
"",
|
||||
f"Auto-fixable (HIGH + MEDIUM): **{stats['high'] + stats['medium']}**",
|
||||
"",
|
||||
]
|
||||
|
||||
if needs_fix:
|
||||
report_lines += ["## Entries Needing Correction", ""]
|
||||
for entry in sorted(needs_fix, key=lambda x: (-x["confidence"], x["key"])):
|
||||
status = entry["status"]
|
||||
emoji = {"HIGH": "✅", "MEDIUM": "⚠️", "LOW": "🔶", "UNKNOWN": "❓", "FILE_MISSING": "🚫"}.get(status, "")
|
||||
report_lines.append(f"### {emoji} {entry['key']} ({status}, conf={entry['confidence']:.2f})")
|
||||
report_lines.append(f"- **File**: `{entry['file']}`")
|
||||
report_lines.append(f"- **Old line**: {entry['old_line']}")
|
||||
report_lines.append(f"- **Content at old line**: `{entry['current_content'][:100]}`")
|
||||
if entry["new_line"]:
|
||||
report_lines.append(f"- **Suggested line**: {entry['new_line']}")
|
||||
report_lines.append(f"- **Header found**: `{entry['suggested_header']}`")
|
||||
else:
|
||||
report_lines.append(f"- **Suggested line**: (not found — manual search needed)")
|
||||
report_lines.append("")
|
||||
|
||||
report_lines += ["## OK Entries (sample)", ""]
|
||||
for entry in ok_entries[:20]:
|
||||
report_lines.append(f"- `{entry['key']}`: line {entry['old_line']} → `{entry['current_content'][:80]}`")
|
||||
|
||||
report_content = "\n".join(report_lines)
|
||||
|
||||
# Write report
|
||||
REPORT_FILE.parent.mkdir(exist_ok=True)
|
||||
with open(REPORT_FILE, "w", encoding="utf-8") as f:
|
||||
f.write(report_content)
|
||||
print(f"\nReport saved to {REPORT_FILE}")
|
||||
|
||||
# Print summary
|
||||
print(f"\n{'='*60}")
|
||||
print(f"OK: {stats['ok']}")
|
||||
print(f"HIGH fix: {stats['high']}")
|
||||
print(f"MEDIUM fix: {stats['medium']}")
|
||||
print(f"LOW fix: {stats['low']}")
|
||||
print(f"UNKNOWN: {stats['unknown']}")
|
||||
print(f"FILE MISSING: {stats['file_missing']}")
|
||||
print(f"Auto-fixable: {stats['high'] + stats['medium']}")
|
||||
print(f"{'='*60}")
|
||||
|
||||
if corrections:
|
||||
print(f"\n{'='*60}")
|
||||
print("PROPOSED CORRECTIONS (HIGH + MEDIUM confidence):")
|
||||
print(f"{'='*60}")
|
||||
for yaml_line, old_val, new_val, key in corrections[:30]:
|
||||
print(f" Line {yaml_line:4d} | {key}")
|
||||
print(f" {old_val} → {new_val}")
|
||||
if len(corrections) > 30:
|
||||
print(f" ... and {len(corrections) - 30} more (see report)")
|
||||
|
||||
if apply_fixes and corrections:
|
||||
print(f"\nApplying {len(corrections)} fixes to reference.yaml...")
|
||||
content = yaml_content
|
||||
applied = 0
|
||||
for yaml_line_num, old_val, new_val, key in corrections:
|
||||
# Find the exact occurrence of old_val near the yaml_line_num
|
||||
# Use line-by-line replacement to be precise
|
||||
lines = content.split("\n")
|
||||
target_idx = yaml_line_num - 1
|
||||
if target_idx < len(lines) and old_val in lines[target_idx]:
|
||||
lines[target_idx] = lines[target_idx].replace(old_val, new_val, 1)
|
||||
applied += 1
|
||||
else:
|
||||
# Try ±2 lines
|
||||
for delta in [-1, 1, -2, 2]:
|
||||
idx = target_idx + delta
|
||||
if 0 <= idx < len(lines) and old_val in lines[idx]:
|
||||
lines[idx] = lines[idx].replace(old_val, new_val, 1)
|
||||
applied += 1
|
||||
break
|
||||
content = "\n".join(lines)
|
||||
|
||||
with open(YAML_FILE, "w", encoding="utf-8") as f:
|
||||
f.write(content)
|
||||
print(f"Applied {applied}/{len(corrections)} fixes.")
|
||||
print(f"Run without --apply to verify remaining issues.")
|
||||
elif not apply_fixes and corrections:
|
||||
print(f"\nRun with --apply to apply HIGH+MEDIUM confidence fixes automatically.")
|
||||
print(f"LOW and UNKNOWN confidence fixes require manual review (see report).")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Loading…
Add table
Add a link
Reference in a new issue