diff --git a/packages/core/src/agent/context-window/token-estimation.test.ts b/packages/core/src/agent/context-window/token-estimation.test.ts index cef9c54c..749097ef 100644 --- a/packages/core/src/agent/context-window/token-estimation.test.ts +++ b/packages/core/src/agent/context-window/token-estimation.test.ts @@ -37,7 +37,7 @@ vi.mock("@mariozechner/pi-coding-agent", () => ({ describe("token-estimation", () => { describe("constants", () => { it("should have correct safety margin", () => { - expect(ESTIMATION_SAFETY_MARGIN).toBe(1.2); + expect(ESTIMATION_SAFETY_MARGIN).toBe(1.5); }); it("should have correct compaction trigger ratio", () => { @@ -63,20 +63,20 @@ describe("token-estimation", () => { }); it("should estimate tokens based on character count", () => { - // ~3 chars per token - expect(estimateSystemPromptTokens("abc")).toBe(1); - expect(estimateSystemPromptTokens("abcdef")).toBe(2); - expect(estimateSystemPromptTokens("abcdefghi")).toBe(3); + // ~2 chars per token (conservative for CJK/mixed content) + expect(estimateSystemPromptTokens("ab")).toBe(1); + expect(estimateSystemPromptTokens("abcd")).toBe(2); + expect(estimateSystemPromptTokens("abcdef")).toBe(3); }); it("should ceil the result", () => { - // 4 chars / 3 = 1.33, should ceil to 2 - expect(estimateSystemPromptTokens("abcd")).toBe(2); + // 3 chars / 2 = 1.5, should ceil to 2 + expect(estimateSystemPromptTokens("abc")).toBe(2); }); it("should handle long prompts", () => { const longPrompt = "a".repeat(3000); - expect(estimateSystemPromptTokens(longPrompt)).toBe(1000); + expect(estimateSystemPromptTokens(longPrompt)).toBe(1500); }); }); @@ -140,7 +140,7 @@ describe("token-estimation", () => { reserveTokens: 0, }); - // Utilization = (tokens * 1.2) / available + // Utilization = (tokens * 1.5) / available expect(result.utilizationRatio).toBeGreaterThan(0); }); }); @@ -292,26 +292,26 @@ describe("token-estimation", () => { content: "x".repeat(400), // ~100 tokens } as AgentMessage; - // With safety margin 1.2, 100 * 1.2 = 120 tokens - // 120 > 1000 * 0.1 = 100, so oversized + // With safety margin 1.5, 100 * 1.5 = 150 tokens + // 150 > 1000 * 0.1 = 100, so oversized expect(isMessageOversized(message, 1000, 0.1)).toBe(true); - // 120 < 1000 * 0.2 = 200, so not oversized + // 150 < 1000 * 0.2 = 200, so not oversized expect(isMessageOversized(message, 1000, 0.2)).toBe(false); }); it("should apply safety margin to token count", () => { const message = { role: "user", - content: "x".repeat(400), // ~100 tokens, with margin ~120 + content: "x".repeat(400), // ~100 tokens, with margin ~150 } as AgentMessage; // Without margin: 100 < 250 (50% of 500) - // With margin: 120 < 250, still ok + // With margin: 150 < 250, still ok expect(isMessageOversized(message, 500, 0.5)).toBe(false); // Without margin: 100 < 100 would be false - // With margin: 120 > 100, should be true + // With margin: 150 > 100, should be true expect(isMessageOversized(message, 200, 0.5)).toBe(true); }); }); diff --git a/packages/core/src/agent/context-window/token-estimation.ts b/packages/core/src/agent/context-window/token-estimation.ts index 7899b050..26524f67 100644 --- a/packages/core/src/agent/context-window/token-estimation.ts +++ b/packages/core/src/agent/context-window/token-estimation.ts @@ -9,7 +9,7 @@ import { estimateTokens } from "@mariozechner/pi-coding-agent"; import type { TokenEstimation, TokenAwareCompactionResult } from "./types.js"; /** Safety margin coefficient to compensate for estimation inaccuracy */ -export const ESTIMATION_SAFETY_MARGIN = 1.2; // 20% buffer +export const ESTIMATION_SAFETY_MARGIN = 1.5; // 50% buffer (covers CJK and mixed content) /** Utilization threshold for triggering compaction */ export const COMPACTION_TRIGGER_RATIO = 0.8; // 80% @@ -32,10 +32,10 @@ export function estimateMessagesTokens(messages: AgentMessage[]): number { */ export function estimateSystemPromptTokens(systemPrompt: string | undefined): number { if (!systemPrompt) return 0; - // Simple estimation: ~4 chars = 1 token (for English/code mixed text) - // Chinese ~2 chars = 1 token - // Average value of 3 - return Math.ceil(systemPrompt.length / 3); + // Conservative estimation: ~2 chars = 1 token + // English/code averages ~4 chars/token but CJK averages ~1-2 chars/token. + // Using /2 as a safe default to prevent underestimation on mixed content. + return Math.ceil(systemPrompt.length / 2); } /** diff --git a/packages/core/src/agent/session/compaction.test.ts b/packages/core/src/agent/session/compaction.test.ts index 124b649f..3529b59d 100644 --- a/packages/core/src/agent/session/compaction.test.ts +++ b/packages/core/src/agent/session/compaction.test.ts @@ -44,7 +44,7 @@ vi.mock("../context-window/index.js", async () => { const systemPromptTokens = params.systemPrompt ? 100 : 0; const reserve = params.reserveTokens ?? 1024; const availableTokens = Math.max(0, params.contextWindowTokens - systemPromptTokens - reserve); - const utilizationRatio = availableTokens > 0 ? (messageTokens * 1.2) / availableTokens : 1; + const utilizationRatio = availableTokens > 0 ? (messageTokens * 1.5) / availableTokens : 1; return { messageTokens, @@ -234,7 +234,7 @@ describe("compaction", () => { // 100 * 10 = 1000 message tokens // System: 100 tokens, Reserve: 1024 // Available: 2000 - 100 - 1024 = 876 - // Utilization: (1000 * 1.2) / 876 = 1.37 > 0.8 + // Utilization: (1000 * 1.5) / 876 = 1.71 > 0.8 const result = compactMessages(messages, { mode: "tokens", contextWindowTokens: 2000, @@ -249,7 +249,7 @@ describe("compaction", () => { const messages = createMessages(5); // 5 * 10 = 50 message tokens // Available: 10000 - 100 - 1024 = 8876 - // Utilization: (50 * 1.2) / 8876 = 0.007 < 0.8 + // Utilization: (50 * 1.5) / 8876 = 0.008 < 0.8 const result = compactMessages(messages, { mode: "tokens", contextWindowTokens: 10000,