fix(agent): increase token estimation safety margins
- ESTIMATION_SAFETY_MARGIN: 1.2 → 1.5 (50% buffer covers CJK text) - estimateSystemPromptTokens: /3 → /2 (conservative for mixed content) This makes the 80% compaction trigger fire earlier, reducing the gap between estimated and actual token counts that caused overflow errors. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
f8ca4ca95e
commit
b13d40c6da
3 changed files with 23 additions and 23 deletions
|
|
@ -37,7 +37,7 @@ vi.mock("@mariozechner/pi-coding-agent", () => ({
|
|||
describe("token-estimation", () => {
|
||||
describe("constants", () => {
|
||||
it("should have correct safety margin", () => {
|
||||
expect(ESTIMATION_SAFETY_MARGIN).toBe(1.2);
|
||||
expect(ESTIMATION_SAFETY_MARGIN).toBe(1.5);
|
||||
});
|
||||
|
||||
it("should have correct compaction trigger ratio", () => {
|
||||
|
|
@ -63,20 +63,20 @@ describe("token-estimation", () => {
|
|||
});
|
||||
|
||||
it("should estimate tokens based on character count", () => {
|
||||
// ~3 chars per token
|
||||
expect(estimateSystemPromptTokens("abc")).toBe(1);
|
||||
expect(estimateSystemPromptTokens("abcdef")).toBe(2);
|
||||
expect(estimateSystemPromptTokens("abcdefghi")).toBe(3);
|
||||
// ~2 chars per token (conservative for CJK/mixed content)
|
||||
expect(estimateSystemPromptTokens("ab")).toBe(1);
|
||||
expect(estimateSystemPromptTokens("abcd")).toBe(2);
|
||||
expect(estimateSystemPromptTokens("abcdef")).toBe(3);
|
||||
});
|
||||
|
||||
it("should ceil the result", () => {
|
||||
// 4 chars / 3 = 1.33, should ceil to 2
|
||||
expect(estimateSystemPromptTokens("abcd")).toBe(2);
|
||||
// 3 chars / 2 = 1.5, should ceil to 2
|
||||
expect(estimateSystemPromptTokens("abc")).toBe(2);
|
||||
});
|
||||
|
||||
it("should handle long prompts", () => {
|
||||
const longPrompt = "a".repeat(3000);
|
||||
expect(estimateSystemPromptTokens(longPrompt)).toBe(1000);
|
||||
expect(estimateSystemPromptTokens(longPrompt)).toBe(1500);
|
||||
});
|
||||
});
|
||||
|
||||
|
|
@ -140,7 +140,7 @@ describe("token-estimation", () => {
|
|||
reserveTokens: 0,
|
||||
});
|
||||
|
||||
// Utilization = (tokens * 1.2) / available
|
||||
// Utilization = (tokens * 1.5) / available
|
||||
expect(result.utilizationRatio).toBeGreaterThan(0);
|
||||
});
|
||||
});
|
||||
|
|
@ -292,26 +292,26 @@ describe("token-estimation", () => {
|
|||
content: "x".repeat(400), // ~100 tokens
|
||||
} as AgentMessage;
|
||||
|
||||
// With safety margin 1.2, 100 * 1.2 = 120 tokens
|
||||
// 120 > 1000 * 0.1 = 100, so oversized
|
||||
// With safety margin 1.5, 100 * 1.5 = 150 tokens
|
||||
// 150 > 1000 * 0.1 = 100, so oversized
|
||||
expect(isMessageOversized(message, 1000, 0.1)).toBe(true);
|
||||
|
||||
// 120 < 1000 * 0.2 = 200, so not oversized
|
||||
// 150 < 1000 * 0.2 = 200, so not oversized
|
||||
expect(isMessageOversized(message, 1000, 0.2)).toBe(false);
|
||||
});
|
||||
|
||||
it("should apply safety margin to token count", () => {
|
||||
const message = {
|
||||
role: "user",
|
||||
content: "x".repeat(400), // ~100 tokens, with margin ~120
|
||||
content: "x".repeat(400), // ~100 tokens, with margin ~150
|
||||
} as AgentMessage;
|
||||
|
||||
// Without margin: 100 < 250 (50% of 500)
|
||||
// With margin: 120 < 250, still ok
|
||||
// With margin: 150 < 250, still ok
|
||||
expect(isMessageOversized(message, 500, 0.5)).toBe(false);
|
||||
|
||||
// Without margin: 100 < 100 would be false
|
||||
// With margin: 120 > 100, should be true
|
||||
// With margin: 150 > 100, should be true
|
||||
expect(isMessageOversized(message, 200, 0.5)).toBe(true);
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ import { estimateTokens } from "@mariozechner/pi-coding-agent";
|
|||
import type { TokenEstimation, TokenAwareCompactionResult } from "./types.js";
|
||||
|
||||
/** Safety margin coefficient to compensate for estimation inaccuracy */
|
||||
export const ESTIMATION_SAFETY_MARGIN = 1.2; // 20% buffer
|
||||
export const ESTIMATION_SAFETY_MARGIN = 1.5; // 50% buffer (covers CJK and mixed content)
|
||||
|
||||
/** Utilization threshold for triggering compaction */
|
||||
export const COMPACTION_TRIGGER_RATIO = 0.8; // 80%
|
||||
|
|
@ -32,10 +32,10 @@ export function estimateMessagesTokens(messages: AgentMessage[]): number {
|
|||
*/
|
||||
export function estimateSystemPromptTokens(systemPrompt: string | undefined): number {
|
||||
if (!systemPrompt) return 0;
|
||||
// Simple estimation: ~4 chars = 1 token (for English/code mixed text)
|
||||
// Chinese ~2 chars = 1 token
|
||||
// Average value of 3
|
||||
return Math.ceil(systemPrompt.length / 3);
|
||||
// Conservative estimation: ~2 chars = 1 token
|
||||
// English/code averages ~4 chars/token but CJK averages ~1-2 chars/token.
|
||||
// Using /2 as a safe default to prevent underestimation on mixed content.
|
||||
return Math.ceil(systemPrompt.length / 2);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -44,7 +44,7 @@ vi.mock("../context-window/index.js", async () => {
|
|||
const systemPromptTokens = params.systemPrompt ? 100 : 0;
|
||||
const reserve = params.reserveTokens ?? 1024;
|
||||
const availableTokens = Math.max(0, params.contextWindowTokens - systemPromptTokens - reserve);
|
||||
const utilizationRatio = availableTokens > 0 ? (messageTokens * 1.2) / availableTokens : 1;
|
||||
const utilizationRatio = availableTokens > 0 ? (messageTokens * 1.5) / availableTokens : 1;
|
||||
|
||||
return {
|
||||
messageTokens,
|
||||
|
|
@ -234,7 +234,7 @@ describe("compaction", () => {
|
|||
// 100 * 10 = 1000 message tokens
|
||||
// System: 100 tokens, Reserve: 1024
|
||||
// Available: 2000 - 100 - 1024 = 876
|
||||
// Utilization: (1000 * 1.2) / 876 = 1.37 > 0.8
|
||||
// Utilization: (1000 * 1.5) / 876 = 1.71 > 0.8
|
||||
const result = compactMessages(messages, {
|
||||
mode: "tokens",
|
||||
contextWindowTokens: 2000,
|
||||
|
|
@ -249,7 +249,7 @@ describe("compaction", () => {
|
|||
const messages = createMessages(5);
|
||||
// 5 * 10 = 50 message tokens
|
||||
// Available: 10000 - 100 - 1024 = 8876
|
||||
// Utilization: (50 * 1.2) / 8876 = 0.007 < 0.8
|
||||
// Utilization: (50 * 1.5) / 8876 = 0.008 < 0.8
|
||||
const result = compactMessages(messages, {
|
||||
mode: "tokens",
|
||||
contextWindowTokens: 10000,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue