feat(examples): add prompt caching test script

Standalone TypeScript script (zero deps, native fetch) to verify
Anthropic prompt caching is active. Documents 4 production gotchas
not in official docs: beta header required for Claude 4.x, effective
threshold ~2048+ tokens, cached tokens excluded from input_tokens,
new nested cache_creation response format.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Florian BRUNIAUX 2026-03-13 17:17:31 +01:00
parent b6ce1ef72f
commit aafb412fa4
3 changed files with 145 additions and 0 deletions

View file

@ -26,6 +26,7 @@ Utility scripts for Claude Code power users.
| `rtk-benchmark.sh` | Benchmark RTK token savings vs raw commands |
| `sync-claude-config.sh` | Sync Claude config files across machines |
| `sonnetplan.sh` | Run Claude with Sonnet replacing Opus (cost optimization alias) |
| `test-prompt-caching.ts` | Verify Anthropic prompt caching is active (no deps, fetch only) |
---

View file

@ -0,0 +1,132 @@
/**
* Prompt Caching Test Anthropic API
*
* Verifies that prompt caching is active on your Anthropic API key.
* Runs 3 identical calls and checks that calls 2-3 read from cache.
*
* Usage:
* ANTHROPIC_API_KEY=sk-ant-... npx tsx test-prompt-caching.ts
*
* Requirements:
* - Node 18+ (native fetch)
* - No dependencies required
*
* Gotchas discovered in production (not in official docs):
* 1. The `anthropic-beta: prompt-caching-2024-07-31` header is required
* even for Claude 4.x models omitting it silently disables caching.
* 2. The effective token threshold for Claude 4.x is ~2048+, not the
* documented 1024. Blocks below this threshold return write=0 with no warning.
* 3. Cached tokens are excluded from `input_tokens` in the response.
* Track `cache_creation_input_tokens` and `cache_read_input_tokens` instead.
* 4. The new API format exposes a nested `cache_creation` object with
* `ephemeral_5m_input_tokens` (5-min TTL) and `ephemeral_1h_input_tokens` (1-hour TTL).
*/
const API_KEY = process.env.ANTHROPIC_API_KEY;
if (!API_KEY) {
console.error("Error: ANTHROPIC_API_KEY environment variable is not set.");
process.exit(1);
}
// Stable system prompt — must exceed the effective cache threshold (~2048 tokens for Claude 4.x).
// In real usage: consolidate agent rules, platform context, and static instructions into one block.
const STABLE_SYSTEM_PROMPT = `
You are a helpful assistant.
Core rules:
- Be concise and accurate
- Cite sources when referencing external content
- Match the language of the user
Extended context (padding to exceed cache threshold):
${Array(200).fill("This platform helps users collaborate on tasks, track progress, and organize their workflow efficiently.").join(" ")}
`.trim();
type Usage = {
input_tokens: number;
output_tokens: number;
cache_creation_input_tokens?: number;
cache_read_input_tokens?: number;
cache_creation?: {
ephemeral_5m_input_tokens?: number;
ephemeral_1h_input_tokens?: number;
};
};
async function callAPI(callIndex: number): Promise<Usage> {
const response = await fetch("https://api.anthropic.com/v1/messages", {
method: "POST",
headers: {
"x-api-key": API_KEY!,
"anthropic-version": "2023-06-01",
// Required for all models, including Claude 4.x
"anthropic-beta": "prompt-caching-2024-07-31",
"content-type": "application/json",
},
body: JSON.stringify({
model: "claude-sonnet-4-6",
max_tokens: 50,
system: [
{
type: "text",
text: STABLE_SYSTEM_PROMPT,
// Mark this block for caching — must be the last or only system block
cache_control: { type: "ephemeral" },
},
],
messages: [
{ role: "user", content: `Test call ${callIndex} — reply OK` },
],
}),
});
if (!response.ok) {
const body = await response.text();
throw new Error(`API ${response.status}: ${body}`);
}
const data = (await response.json()) as { usage: Usage };
return data.usage;
}
async function main() {
console.log("Prompt Caching Test\n");
let passed = true;
for (let i = 1; i <= 3; i++) {
const usage = await callAPI(i);
const write = usage.cache_creation_input_tokens ?? 0;
const read = usage.cache_read_input_tokens ?? 0;
let status: string;
if (i === 1) {
status = write > 0 ? "cache written" : "❌ write failed (check token threshold)";
if (write === 0) passed = false;
} else {
status = read > 0 ? "✅ cache hit" : "❌ cache miss";
if (read === 0) passed = false;
}
console.log(
`Call ${i}: write=${String(write).padStart(5)} read=${String(read).padStart(5)} input=${String(usage.input_tokens).padStart(4)}${status}`
);
}
console.log();
if (passed) {
console.log("Result: caching is working correctly.");
} else {
console.log("Result: caching is NOT working. Common causes:");
console.log(" - Token threshold not met (try increasing STABLE_SYSTEM_PROMPT)");
console.log(" - Missing beta header (anthropic-beta: prompt-caching-2024-07-31)");
console.log(" - Account tier does not support caching");
process.exit(1);
}
}
main().catch((err) => {
console.error(err.message);
process.exit(1);
});