diff --git a/packages/core/src/agent/context-window/token-estimation.test.ts b/packages/core/src/agent/context-window/token-estimation.test.ts index cef9c54c..749097ef 100644 --- a/packages/core/src/agent/context-window/token-estimation.test.ts +++ b/packages/core/src/agent/context-window/token-estimation.test.ts @@ -37,7 +37,7 @@ vi.mock("@mariozechner/pi-coding-agent", () => ({ describe("token-estimation", () => { describe("constants", () => { it("should have correct safety margin", () => { - expect(ESTIMATION_SAFETY_MARGIN).toBe(1.2); + expect(ESTIMATION_SAFETY_MARGIN).toBe(1.5); }); it("should have correct compaction trigger ratio", () => { @@ -63,20 +63,20 @@ describe("token-estimation", () => { }); it("should estimate tokens based on character count", () => { - // ~3 chars per token - expect(estimateSystemPromptTokens("abc")).toBe(1); - expect(estimateSystemPromptTokens("abcdef")).toBe(2); - expect(estimateSystemPromptTokens("abcdefghi")).toBe(3); + // ~2 chars per token (conservative for CJK/mixed content) + expect(estimateSystemPromptTokens("ab")).toBe(1); + expect(estimateSystemPromptTokens("abcd")).toBe(2); + expect(estimateSystemPromptTokens("abcdef")).toBe(3); }); it("should ceil the result", () => { - // 4 chars / 3 = 1.33, should ceil to 2 - expect(estimateSystemPromptTokens("abcd")).toBe(2); + // 3 chars / 2 = 1.5, should ceil to 2 + expect(estimateSystemPromptTokens("abc")).toBe(2); }); it("should handle long prompts", () => { const longPrompt = "a".repeat(3000); - expect(estimateSystemPromptTokens(longPrompt)).toBe(1000); + expect(estimateSystemPromptTokens(longPrompt)).toBe(1500); }); }); @@ -140,7 +140,7 @@ describe("token-estimation", () => { reserveTokens: 0, }); - // Utilization = (tokens * 1.2) / available + // Utilization = (tokens * 1.5) / available expect(result.utilizationRatio).toBeGreaterThan(0); }); }); @@ -292,26 +292,26 @@ describe("token-estimation", () => { content: "x".repeat(400), // ~100 tokens } as AgentMessage; - // With safety margin 1.2, 100 * 1.2 = 120 tokens - // 120 > 1000 * 0.1 = 100, so oversized + // With safety margin 1.5, 100 * 1.5 = 150 tokens + // 150 > 1000 * 0.1 = 100, so oversized expect(isMessageOversized(message, 1000, 0.1)).toBe(true); - // 120 < 1000 * 0.2 = 200, so not oversized + // 150 < 1000 * 0.2 = 200, so not oversized expect(isMessageOversized(message, 1000, 0.2)).toBe(false); }); it("should apply safety margin to token count", () => { const message = { role: "user", - content: "x".repeat(400), // ~100 tokens, with margin ~120 + content: "x".repeat(400), // ~100 tokens, with margin ~150 } as AgentMessage; // Without margin: 100 < 250 (50% of 500) - // With margin: 120 < 250, still ok + // With margin: 150 < 250, still ok expect(isMessageOversized(message, 500, 0.5)).toBe(false); // Without margin: 100 < 100 would be false - // With margin: 120 > 100, should be true + // With margin: 150 > 100, should be true expect(isMessageOversized(message, 200, 0.5)).toBe(true); }); }); diff --git a/packages/core/src/agent/context-window/token-estimation.ts b/packages/core/src/agent/context-window/token-estimation.ts index 7899b050..26524f67 100644 --- a/packages/core/src/agent/context-window/token-estimation.ts +++ b/packages/core/src/agent/context-window/token-estimation.ts @@ -9,7 +9,7 @@ import { estimateTokens } from "@mariozechner/pi-coding-agent"; import type { TokenEstimation, TokenAwareCompactionResult } from "./types.js"; /** Safety margin coefficient to compensate for estimation inaccuracy */ -export const ESTIMATION_SAFETY_MARGIN = 1.2; // 20% buffer +export const ESTIMATION_SAFETY_MARGIN = 1.5; // 50% buffer (covers CJK and mixed content) /** Utilization threshold for triggering compaction */ export const COMPACTION_TRIGGER_RATIO = 0.8; // 80% @@ -32,10 +32,10 @@ export function estimateMessagesTokens(messages: AgentMessage[]): number { */ export function estimateSystemPromptTokens(systemPrompt: string | undefined): number { if (!systemPrompt) return 0; - // Simple estimation: ~4 chars = 1 token (for English/code mixed text) - // Chinese ~2 chars = 1 token - // Average value of 3 - return Math.ceil(systemPrompt.length / 3); + // Conservative estimation: ~2 chars = 1 token + // English/code averages ~4 chars/token but CJK averages ~1-2 chars/token. + // Using /2 as a safe default to prevent underestimation on mixed content. + return Math.ceil(systemPrompt.length / 2); } /** diff --git a/packages/core/src/agent/errors.ts b/packages/core/src/agent/errors.ts new file mode 100644 index 00000000..7f51bfce --- /dev/null +++ b/packages/core/src/agent/errors.ts @@ -0,0 +1,21 @@ +/** + * Error classification utilities for agent error handling. + */ + +/** + * Check if an error is a context overflow / "prompt too long" error from any LLM provider. + * + * These errors indicate the request exceeded the model's context window and should + * trigger auto-compaction rather than auth profile rotation. + */ +export function isContextOverflowError(error: unknown): boolean { + const msg = (error instanceof Error ? error.message : String(error)).toLowerCase(); + return ( + msg.includes("prompt is too long") || + msg.includes("context length exceeded") || + msg.includes("maximum context length") || + msg.includes("request_too_large") || + msg.includes("request size exceeds") || + (msg.includes("413") && msg.includes("too large")) + ); +} diff --git a/packages/core/src/agent/runner.ts b/packages/core/src/agent/runner.ts index 81b7fc26..105eee43 100644 --- a/packages/core/src/agent/runner.ts +++ b/packages/core/src/agent/runner.ts @@ -22,7 +22,14 @@ import { checkContextWindow, DEFAULT_CONTEXT_TOKENS, type ContextWindowGuardResult, + estimateTokenUsage, + COMPACTION_TRIGGER_RATIO, + compactMessagesTokenAware, + MIN_KEEP_MESSAGES, } from "./context-window/index.js"; +import { + pruneToolResults, +} from "./context-window/tool-result-pruning.js"; import { mergeToolsConfig, type ToolsConfig } from "./tools/policy.js"; import { loadAuthProfileStore, @@ -42,6 +49,7 @@ import { sanitizeToolCallInputs, sanitizeToolUseResultPairing, } from "./session/session-transcript-repair.js"; +import { isContextOverflowError } from "./errors.js"; // ============================================================ // Error classification for auth profile rotation @@ -89,11 +97,15 @@ export class Agent { private readonly stderr: NodeJS.WritableStream; private initialized = false; + // Context window settings (for pre-flight compaction) + private readonly reserveTokens: number; + // Internal run state private _internalRun = false; private _isRunning = false; private _aborted = false; private _runMutex: Promise = Promise.resolve(); + private _compactionPromise: Promise = Promise.resolve(); private currentUserDisplayPrompt: string | undefined; // MulticaEvent subscribers (parallel to PiAgentCore's subscriber list) @@ -188,8 +200,10 @@ export class Agent { return this.currentApiKey; }, transformContext: async (messages) => { - const sanitizedInputs = sanitizeToolCallInputs(messages); - return sanitizeToolUseResultPairing(sanitizedInputs); + let result = sanitizeToolCallInputs(messages); + result = sanitizeToolUseResultPairing(result); + result = this.preflightCompact(result); + return result; }, }); @@ -260,6 +274,9 @@ export class Agent { ? resolveApiKey(this.resolvedProvider, options.apiKey) : undefined; + // Store reserveTokens for pre-flight compaction + this.reserveTokens = options.reserveTokens ?? 1024; + // 创建 SessionManager(带 context window 配置) this.session = new SessionManager({ sessionId: this.sessionId, @@ -425,6 +442,8 @@ export class Agent { prompt: string, options?: { displayPrompt?: string }, ): Promise { + // Wait for any in-flight compaction from the previous run + await this._compactionPromise; await this.ensureInitialized(); this.refreshAuthState(); this.output.state.lastAssistantText = ""; @@ -444,6 +463,9 @@ export class Agent { const canRotate = !this.pinnedProfile && this.profileCandidates.length > 1; let lastError: unknown; + const MAX_OVERFLOW_COMPACTION_ATTEMPTS = 2; + let overflowAttempts = 0; + // Loop to exhaust all candidate profiles on rotatable errors while (true) { try { @@ -452,6 +474,34 @@ export class Agent { } catch (error) { lastError = error; + // Context overflow recovery: auto-compact and retry before trying auth rotation + if (isContextOverflowError(error) && overflowAttempts < MAX_OVERFLOW_COMPACTION_ATTEMPTS) { + overflowAttempts++; + this.stderr.write( + `[context-overflow] Overflow detected (attempt ${overflowAttempts}/${MAX_OVERFLOW_COMPACTION_ATTEMPTS}), compacting...\n`, + ); + const messages = this.agent.state.messages.slice(); + const result = await this.session.maybeCompact(messages); + if (result?.kept) { + this.agent.replaceMessages(result.kept); + this.output.state.lastAssistantText = ""; + continue; // retry with compacted messages + } + // Forced fallback: estimation may diverge from reality (the LLM + // already told us the context is too large), so drop the oldest + // half of messages even when maybeCompact thinks no compaction is needed. + if (messages.length > MIN_KEEP_MESSAGES) { + const keepCount = Math.max(MIN_KEEP_MESSAGES, Math.floor(messages.length / 2)); + const forcedKept = messages.slice(-keepCount); + this.stderr.write( + `[context-overflow] Forced compaction: ${messages.length} → ${forcedKept.length} messages\n`, + ); + this.agent.replaceMessages(forcedKept); + this.output.state.lastAssistantText = ""; + continue; + } + } + const reason = classifyError(error); if (this.currentProfileId && isRotatableError(reason)) { markAuthProfileFailure(this.currentProfileId, reason); @@ -615,35 +665,88 @@ export class Agent { // Skip compaction during internal runs — internal messages will be // rolled back from memory afterwards, so compacting now would be incorrect. if (message.role === "assistant" && !this._internalRun) { - void this.maybeCompact(); + this._compactionPromise = this.maybeCompact().catch((err) => { + console.error("[Agent] Compaction failed:", err); + }); } } } + /** + * Pre-flight context compaction — runs inside transformContext before every LLM call. + * Pure in-memory, no disk writes. Prunes tool results and drops oldest messages + * when the estimated token utilization exceeds the compaction trigger threshold. + */ + private preflightCompact(messages: AgentMessage[]): AgentMessage[] { + const estimation = estimateTokenUsage({ + messages, + systemPrompt: this.agent.state.systemPrompt, + contextWindowTokens: this.contextWindowGuard.tokens, + reserveTokens: this.reserveTokens, + }); + + if (estimation.utilizationRatio < COMPACTION_TRIGGER_RATIO) { + return messages; // fast path + } + + const originalCount = messages.length; + let result = messages; + + // Phase 1: Prune tool results (soft trim + hard clear) + const pruneResult = pruneToolResults({ + messages: result, + contextWindowTokens: this.contextWindowGuard.tokens, + }); + if (pruneResult.changed) { + result = pruneResult.messages; + } + + // Re-estimate after pruning + const afterPrune = estimateTokenUsage({ + messages: result, + systemPrompt: this.agent.state.systemPrompt, + contextWindowTokens: this.contextWindowGuard.tokens, + reserveTokens: this.reserveTokens, + }); + + // Phase 2: Drop oldest messages if still over threshold + if (afterPrune.utilizationRatio >= COMPACTION_TRIGGER_RATIO) { + const compacted = compactMessagesTokenAware(result, afterPrune.availableTokens); + if (compacted) { + result = compacted.kept; + } + } + + if (result.length < originalCount) { + const saved = originalCount - result.length; + this.stderr.write( + `[pre-flight compaction] pruned ${saved} messages (${originalCount} → ${result.length})\n`, + ); + } + + return result; + } + private async maybeCompact() { const messages = this.agent.state.messages.slice(); if (!this.session.needsCompaction(messages)) return; - try { - const result = await this.session.maybeCompact(messages); - if (!result) return; + const result = await this.session.maybeCompact(messages); + if (!result) return; - this.emitMulticaEvent({ type: "compaction_start" }); - if (result?.kept) { - this.agent.replaceMessages(result.kept); - } - const endEvent: CompactionEndEvent = { - type: "compaction_end", - removed: result?.removedCount ?? 0, - kept: result?.kept.length ?? messages.length, - tokensRemoved: result?.tokensRemoved, - tokensKept: result?.tokensKept, - reason: result?.reason ?? "tokens", - }; - this.emitMulticaEvent(endEvent); - } catch (err) { - throw err; + this.emitMulticaEvent({ type: "compaction_start" }); + if (result.kept) { + this.agent.replaceMessages(result.kept); } + const endEvent: CompactionEndEvent = { + type: "compaction_end", + removed: result.removedCount ?? 0, + kept: result.kept.length ?? messages.length, + tokensRemoved: result.tokensRemoved, + tokensKept: result.tokensKept, + reason: result.reason ?? "tokens", + }; + this.emitMulticaEvent(endEvent); } /** diff --git a/packages/core/src/agent/session/compaction.test.ts b/packages/core/src/agent/session/compaction.test.ts index 124b649f..3529b59d 100644 --- a/packages/core/src/agent/session/compaction.test.ts +++ b/packages/core/src/agent/session/compaction.test.ts @@ -44,7 +44,7 @@ vi.mock("../context-window/index.js", async () => { const systemPromptTokens = params.systemPrompt ? 100 : 0; const reserve = params.reserveTokens ?? 1024; const availableTokens = Math.max(0, params.contextWindowTokens - systemPromptTokens - reserve); - const utilizationRatio = availableTokens > 0 ? (messageTokens * 1.2) / availableTokens : 1; + const utilizationRatio = availableTokens > 0 ? (messageTokens * 1.5) / availableTokens : 1; return { messageTokens, @@ -234,7 +234,7 @@ describe("compaction", () => { // 100 * 10 = 1000 message tokens // System: 100 tokens, Reserve: 1024 // Available: 2000 - 100 - 1024 = 876 - // Utilization: (1000 * 1.2) / 876 = 1.37 > 0.8 + // Utilization: (1000 * 1.5) / 876 = 1.71 > 0.8 const result = compactMessages(messages, { mode: "tokens", contextWindowTokens: 2000, @@ -249,7 +249,7 @@ describe("compaction", () => { const messages = createMessages(5); // 5 * 10 = 50 message tokens // Available: 10000 - 100 - 1024 = 8876 - // Utilization: (50 * 1.2) / 8876 = 0.007 < 0.8 + // Utilization: (50 * 1.5) / 8876 = 0.008 < 0.8 const result = compactMessages(messages, { mode: "tokens", contextWindowTokens: 10000, diff --git a/packages/hooks/src/use-chat.ts b/packages/hooks/src/use-chat.ts index 5711cde5..208f06b2 100644 --- a/packages/hooks/src/use-chat.ts +++ b/packages/hooks/src/use-chat.ts @@ -9,13 +9,22 @@ import { type AgentMessageItem, type ExecApprovalRequestPayload, type ApprovalDecision, + type CompactionEndEvent, } from "@multica/sdk"; export type ToolStatus = "running" | "success" | "error" | "interrupted"; +export interface CompactionInfo { + removed: number; + kept: number; + tokensRemoved?: number; + tokensKept?: number; + reason: string; +} + export interface Message { id: string; - role: "user" | "assistant" | "toolResult"; + role: "user" | "assistant" | "toolResult" | "system"; content: ContentBlock[]; agentId: string; stopReason?: string; @@ -24,6 +33,8 @@ export interface Message { toolArgs?: Record; toolStatus?: ToolStatus; isError?: boolean; + systemType?: "compaction"; + compaction?: CompactionInfo; } export interface ChatError { @@ -215,6 +226,27 @@ export function useChat() { } case "tool_execution_update": break; + case "compaction_end": { + const ce = event as CompactionEndEvent; + setMessages((prev) => [ + ...prev, + { + id: uuidv7(), + role: "system", + content: [], + agentId: payload.agentId, + systemType: "compaction", + compaction: { + removed: ce.removed, + kept: ce.kept, + tokensRemoved: ce.tokensRemoved, + tokensKept: ce.tokensKept, + reason: ce.reason, + }, + }, + ]); + break; + } } }, []); diff --git a/packages/store/src/types.ts b/packages/store/src/types.ts index 40654954..d0d48340 100644 --- a/packages/store/src/types.ts +++ b/packages/store/src/types.ts @@ -2,9 +2,17 @@ import type { ContentBlock } from "@multica/sdk" export type ToolStatus = "running" | "success" | "error" | "interrupted" +export interface CompactionInfo { + removed: number + kept: number + tokensRemoved?: number + tokensKept?: number + reason: string +} + export interface Message { id: string - role: "user" | "assistant" | "toolResult" + role: "user" | "assistant" | "toolResult" | "system" content: ContentBlock[] agentId: string stopReason?: string @@ -13,4 +21,6 @@ export interface Message { toolArgs?: Record toolStatus?: ToolStatus isError?: boolean + systemType?: "compaction" + compaction?: CompactionInfo } diff --git a/packages/ui/src/components/compaction-item.tsx b/packages/ui/src/components/compaction-item.tsx new file mode 100644 index 00000000..1425f94c --- /dev/null +++ b/packages/ui/src/components/compaction-item.tsx @@ -0,0 +1,45 @@ +"use client" + +import { memo } from "react" +import { Scissors } from "lucide-react" +import type { Message } from "@multica/store" + +function formatTokens(n: number): string { + if (n >= 1000) return `~${(n / 1000).toFixed(1)}k` + return `${n}` +} + +interface CompactionItemProps { + message: Message +} + +export const CompactionItem = memo(function CompactionItem({ message }: CompactionItemProps) { + const info = message.compaction + if (!info) return null + + const label = info.reason === "summary" ? "Context summarized" : "Context compacted" + const removed = `${info.removed} messages removed` + const tokens = info.tokensRemoved != null + ? `, ${formatTokens(info.tokensRemoved)} tokens freed` + : "" + + return ( +
+
+ {/* Status dot */} + + + {/* Icon */} + + + {/* Label */} + {label} + + {/* Stats */} + + {removed}{tokens} + +
+
+ ) +}) diff --git a/packages/ui/src/components/message-list.tsx b/packages/ui/src/components/message-list.tsx index 3a754bc3..b2ea2fa2 100644 --- a/packages/ui/src/components/message-list.tsx +++ b/packages/ui/src/components/message-list.tsx @@ -5,6 +5,7 @@ import { MemoizedMarkdown } from "@multica/ui/components/markdown"; import { StreamingMarkdown } from "@multica/ui/components/markdown/StreamingMarkdown"; import { ToolCallItem } from "@multica/ui/components/tool-call-item"; import { ThinkingItem } from "@multica/ui/components/thinking-item"; +import { CompactionItem } from "@multica/ui/components/compaction-item"; import { cn, getTextContent } from "@multica/ui/lib/utils"; import type { Message } from "@multica/store"; import type { ContentBlock, ToolCall, ThinkingContent } from "@multica/sdk"; @@ -78,6 +79,11 @@ export const MessageList = memo(function MessageList({ messages, streamingIds }: return (
{messages.map((msg) => { + // System messages (e.g. compaction notifications) + if (msg.role === "system") { + return + } + // ToolResult messages → render as tool execution item if (msg.role === "toolResult") { return