From aafb412fa4c3bb3a3d1b774ee7b4ba890354266c Mon Sep 17 00:00:00 2001 From: Florian BRUNIAUX Date: Fri, 13 Mar 2026 17:17:31 +0100 Subject: [PATCH] feat(examples): add prompt caching test script Standalone TypeScript script (zero deps, native fetch) to verify Anthropic prompt caching is active. Documents 4 production gotchas not in official docs: beta header required for Claude 4.x, effective threshold ~2048+ tokens, cached tokens excluded from input_tokens, new nested cache_creation response format. Co-Authored-By: Claude Sonnet 4.6 --- CHANGELOG.md | 12 +++ examples/scripts/README.md | 1 + examples/scripts/test-prompt-caching.ts | 132 ++++++++++++++++++++++++ 3 files changed, 145 insertions(+) create mode 100644 examples/scripts/test-prompt-caching.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index be1855a..7229122 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,18 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). ### Added +- **`examples/scripts/test-prompt-caching.ts`** — Standalone TypeScript script (zero deps, native fetch) to verify Anthropic prompt caching is active on any API key. Runs 3 identical calls and checks write/read metrics. Documents 4 production gotchas not in official docs: (1) `anthropic-beta: prompt-caching-2024-07-31` header is required even for Claude 4.x, (2) effective token threshold for Claude 4.x is ~2048+ not the documented 1024, (3) cached tokens are excluded from `input_tokens`, (4) new nested `cache_creation` object format with `ephemeral_5m_input_tokens` and `ephemeral_1h_input_tokens`. Usage: `ANTHROPIC_API_KEY=sk-ant-... npx tsx test-prompt-caching.ts`. + +- **`CLAUDE.md` Behavioral Rules section** — New `## Behavioral Rules` section with 5 rules derived from observed session friction patterns (via `/insights` analysis): (1) always update `CHANGELOG.md` after any modification, (2) be exhaustive on first pass for audits and reviews, (3) use absolute paths in reports and documentation, (4) closing checklist confirming files changed + changelog + commit hash, (5) bias toward action — no extended planning loops without deliverables. + +### Added + +- **Desloppify tool** — New subsection "Fighting Vibe Code Degradation" in §9.8 (Vibe Coding). Documents `desloppify` ([peteromallet/desloppify](https://github.com/peteromallet/desloppify)), a community tool that installs a fix-loop workflow directly into Claude Code as a skill (`desloppify update-skill claude`) and runs a scan → next → fix → resolve loop to systematically improve code quality. Includes install snippet, the loop commands, and an early-stage status note with token cost caveat. Tagged early-stage (released February 2026, ~2K stars, no production-scale feedback yet). + +- **`guide/workflows/github-actions.md`** — New workflow guide (5 production-ready patterns for GitHub Actions CI/CD with `anthropics/claude-code-action`, 6.2k stars, v1.0). Covers: (1) interactive PR review via `@claude` mention, (2) automatic review on push, (3) issue triage and labeling, (4) security-focused review triggered on sensitive paths (`auth/**`, `payments/**`), (5) scheduled weekly repo health check. Includes cost control table (Haiku vs Sonnet per pattern), concurrency setup to prevent parallel runs, fork safety guard for public repos, and Bedrock/Vertex authentication alternatives. Cross-linked from section 9.3 of the main guide and added to `guide/workflows/README.md`. + +- **`guide/workflows/README.md`**: Added GitHub Actions Workflows entry to Development Workflows section with description, key topics, and "when to use" guidance. + - **`guide/workflows/rpi.md`** — New workflow guide (RPI: Research → Plan → Implement). 3-phase feature development pattern with explicit validation gates: Research produces `RESEARCH.md`, Plan produces `PLAN.md`, Implement produces working code. Each gate requires explicit GO before the next phase. Includes slash command templates (`/rpi:research`, `/rpi:plan`, `/rpi:implement`), a worked example (adding rate limiting to an Express API), and comparison matrix vs Plan-Driven, TDD, and Spec-First. Best for features where discovering a wrong assumption late is expensive. - **`guide/workflows/changelog-fragments.md`** — New workflow guide for the Changelog Fragments pattern: one YAML fragment per PR, written at implementation time, validated by CI, assembled automatically at release. Covers 3-layer enforcement: (1) CLAUDE.md workflow rule for autonomous fragment creation, (2) `UserPromptSubmit` hook with 3-tier priority (enforcement → discovery → contextual), (3) independent CI migration check job. Includes the `UserPromptSubmit` tier pattern as a reusable hook architecture for any mandatory workflow step. diff --git a/examples/scripts/README.md b/examples/scripts/README.md index 66d22e3..1b9b74f 100644 --- a/examples/scripts/README.md +++ b/examples/scripts/README.md @@ -26,6 +26,7 @@ Utility scripts for Claude Code power users. | `rtk-benchmark.sh` | Benchmark RTK token savings vs raw commands | | `sync-claude-config.sh` | Sync Claude config files across machines | | `sonnetplan.sh` | Run Claude with Sonnet replacing Opus (cost optimization alias) | +| `test-prompt-caching.ts` | Verify Anthropic prompt caching is active (no deps, fetch only) | --- diff --git a/examples/scripts/test-prompt-caching.ts b/examples/scripts/test-prompt-caching.ts new file mode 100644 index 0000000..7cc28eb --- /dev/null +++ b/examples/scripts/test-prompt-caching.ts @@ -0,0 +1,132 @@ +/** + * Prompt Caching Test — Anthropic API + * + * Verifies that prompt caching is active on your Anthropic API key. + * Runs 3 identical calls and checks that calls 2-3 read from cache. + * + * Usage: + * ANTHROPIC_API_KEY=sk-ant-... npx tsx test-prompt-caching.ts + * + * Requirements: + * - Node 18+ (native fetch) + * - No dependencies required + * + * Gotchas discovered in production (not in official docs): + * 1. The `anthropic-beta: prompt-caching-2024-07-31` header is required + * even for Claude 4.x models — omitting it silently disables caching. + * 2. The effective token threshold for Claude 4.x is ~2048+, not the + * documented 1024. Blocks below this threshold return write=0 with no warning. + * 3. Cached tokens are excluded from `input_tokens` in the response. + * Track `cache_creation_input_tokens` and `cache_read_input_tokens` instead. + * 4. The new API format exposes a nested `cache_creation` object with + * `ephemeral_5m_input_tokens` (5-min TTL) and `ephemeral_1h_input_tokens` (1-hour TTL). + */ + +const API_KEY = process.env.ANTHROPIC_API_KEY; +if (!API_KEY) { + console.error("Error: ANTHROPIC_API_KEY environment variable is not set."); + process.exit(1); +} + +// Stable system prompt — must exceed the effective cache threshold (~2048 tokens for Claude 4.x). +// In real usage: consolidate agent rules, platform context, and static instructions into one block. +const STABLE_SYSTEM_PROMPT = ` +You are a helpful assistant. + +Core rules: +- Be concise and accurate +- Cite sources when referencing external content +- Match the language of the user + +Extended context (padding to exceed cache threshold): +${Array(200).fill("This platform helps users collaborate on tasks, track progress, and organize their workflow efficiently.").join(" ")} +`.trim(); + +type Usage = { + input_tokens: number; + output_tokens: number; + cache_creation_input_tokens?: number; + cache_read_input_tokens?: number; + cache_creation?: { + ephemeral_5m_input_tokens?: number; + ephemeral_1h_input_tokens?: number; + }; +}; + +async function callAPI(callIndex: number): Promise { + const response = await fetch("https://api.anthropic.com/v1/messages", { + method: "POST", + headers: { + "x-api-key": API_KEY!, + "anthropic-version": "2023-06-01", + // Required for all models, including Claude 4.x + "anthropic-beta": "prompt-caching-2024-07-31", + "content-type": "application/json", + }, + body: JSON.stringify({ + model: "claude-sonnet-4-6", + max_tokens: 50, + system: [ + { + type: "text", + text: STABLE_SYSTEM_PROMPT, + // Mark this block for caching — must be the last or only system block + cache_control: { type: "ephemeral" }, + }, + ], + messages: [ + { role: "user", content: `Test call ${callIndex} — reply OK` }, + ], + }), + }); + + if (!response.ok) { + const body = await response.text(); + throw new Error(`API ${response.status}: ${body}`); + } + + const data = (await response.json()) as { usage: Usage }; + return data.usage; +} + +async function main() { + console.log("Prompt Caching Test\n"); + + let passed = true; + + for (let i = 1; i <= 3; i++) { + const usage = await callAPI(i); + + const write = usage.cache_creation_input_tokens ?? 0; + const read = usage.cache_read_input_tokens ?? 0; + + let status: string; + if (i === 1) { + status = write > 0 ? "cache written" : "❌ write failed (check token threshold)"; + if (write === 0) passed = false; + } else { + status = read > 0 ? "✅ cache hit" : "❌ cache miss"; + if (read === 0) passed = false; + } + + console.log( + `Call ${i}: write=${String(write).padStart(5)} read=${String(read).padStart(5)} input=${String(usage.input_tokens).padStart(4)} — ${status}` + ); + } + + console.log(); + if (passed) { + console.log("Result: caching is working correctly."); + } else { + console.log("Result: caching is NOT working. Common causes:"); + console.log(" - Token threshold not met (try increasing STABLE_SYSTEM_PROMPT)"); + console.log(" - Missing beta header (anthropic-beta: prompt-caching-2024-07-31)"); + console.log(" - Account tier does not support caching"); + process.exit(1); + } +} + +main().catch((err) => { + console.error(err.message); + process.exit(1); +});