Merge pull request #136 from multica-ai/fix/agent-compaction

fix(agent): prevent context window overflow with 3-layer compaction defense
2026-02-12 17:23:12 +08:00 · 2026-02-12 17:23:12 +08:00 · 8bc36a9cc9
commit 8bc36a9cc9
parent b0f3e1e38a 8f4e894370
9 changed files with 263 additions and 46 deletions
--- a/packages/core/src/agent/context-window/token-estimation.test.ts
+++ b/packages/core/src/agent/context-window/token-estimation.test.ts
@ -37,7 +37,7 @@ vi.mock("@mariozechner/pi-coding-agent", () => ({
 describe("token-estimation", () => {
  describe("constants", () => {
    it("should have correct safety margin", () => {
-      expect(ESTIMATION_SAFETY_MARGIN).toBe(1.2);
+      expect(ESTIMATION_SAFETY_MARGIN).toBe(1.5);
    });

    it("should have correct compaction trigger ratio", () => {
@ -63,20 +63,20 @@ describe("token-estimation", () => {
    });

    it("should estimate tokens based on character count", () => {
-      // ~3 chars per token
-      expect(estimateSystemPromptTokens("abc")).toBe(1);
-      expect(estimateSystemPromptTokens("abcdef")).toBe(2);
-      expect(estimateSystemPromptTokens("abcdefghi")).toBe(3);
+      // ~2 chars per token (conservative for CJK/mixed content)
+      expect(estimateSystemPromptTokens("ab")).toBe(1);
+      expect(estimateSystemPromptTokens("abcd")).toBe(2);
+      expect(estimateSystemPromptTokens("abcdef")).toBe(3);
    });

    it("should ceil the result", () => {
-      // 4 chars / 3 = 1.33, should ceil to 2
-      expect(estimateSystemPromptTokens("abcd")).toBe(2);
+      // 3 chars / 2 = 1.5, should ceil to 2
+      expect(estimateSystemPromptTokens("abc")).toBe(2);
    });

    it("should handle long prompts", () => {
      const longPrompt = "a".repeat(3000);
-      expect(estimateSystemPromptTokens(longPrompt)).toBe(1000);
+      expect(estimateSystemPromptTokens(longPrompt)).toBe(1500);
    });
  });

@ -140,7 +140,7 @@ describe("token-estimation", () => {
        reserveTokens: 0,
      });

-      // Utilization = (tokens * 1.2) / available
+      // Utilization = (tokens * 1.5) / available
      expect(result.utilizationRatio).toBeGreaterThan(0);
    });
  });
@ -292,26 +292,26 @@ describe("token-estimation", () => {
        content: "x".repeat(400), // ~100 tokens
      } as AgentMessage;

-      // With safety margin 1.2, 100 * 1.2 = 120 tokens
-      // 120 > 1000 * 0.1 = 100, so oversized
+      // With safety margin 1.5, 100 * 1.5 = 150 tokens
+      // 150 > 1000 * 0.1 = 100, so oversized
      expect(isMessageOversized(message, 1000, 0.1)).toBe(true);

-      // 120 < 1000 * 0.2 = 200, so not oversized
+      // 150 < 1000 * 0.2 = 200, so not oversized
      expect(isMessageOversized(message, 1000, 0.2)).toBe(false);
    });

    it("should apply safety margin to token count", () => {
      const message = {
        role: "user",
-        content: "x".repeat(400), // ~100 tokens, with margin ~120
+        content: "x".repeat(400), // ~100 tokens, with margin ~150
      } as AgentMessage;

      // Without margin: 100 < 250 (50% of 500)
-      // With margin: 120 < 250, still ok
+      // With margin: 150 < 250, still ok
      expect(isMessageOversized(message, 500, 0.5)).toBe(false);

      // Without margin: 100 < 100 would be false
-      // With margin: 120 > 100, should be true
+      // With margin: 150 > 100, should be true
      expect(isMessageOversized(message, 200, 0.5)).toBe(true);
    });
  });
--- a/packages/core/src/agent/context-window/token-estimation.ts
+++ b/packages/core/src/agent/context-window/token-estimation.ts
@ -9,7 +9,7 @@ import { estimateTokens } from "@mariozechner/pi-coding-agent";
 import type { TokenEstimation, TokenAwareCompactionResult } from "./types.js";

 /** Safety margin coefficient to compensate for estimation inaccuracy */
-export const ESTIMATION_SAFETY_MARGIN = 1.2; // 20% buffer
+export const ESTIMATION_SAFETY_MARGIN = 1.5; // 50% buffer (covers CJK and mixed content)

 /** Utilization threshold for triggering compaction */
 export const COMPACTION_TRIGGER_RATIO = 0.8; // 80%
@ -32,10 +32,10 @@ export function estimateMessagesTokens(messages: AgentMessage[]): number {
 */
 export function estimateSystemPromptTokens(systemPrompt: string | undefined): number {
  if (!systemPrompt) return 0;
-  // Simple estimation: ~4 chars = 1 token (for English/code mixed text)
-  // Chinese ~2 chars = 1 token
-  // Average value of 3
-  return Math.ceil(systemPrompt.length / 3);
+  // Conservative estimation: ~2 chars = 1 token
+  // English/code averages ~4 chars/token but CJK averages ~1-2 chars/token.
+  // Using /2 as a safe default to prevent underestimation on mixed content.
+  return Math.ceil(systemPrompt.length / 2);
 }

 /**
--- a/packages/core/src/agent/errors.ts
+++ b/packages/core/src/agent/errors.ts
@ -0,0 +1,21 @@
+/**
+ * Error classification utilities for agent error handling.
+ */
+
+/**
+ * Check if an error is a context overflow / "prompt too long" error from any LLM provider.
+ *
+ * These errors indicate the request exceeded the model's context window and should
+ * trigger auto-compaction rather than auth profile rotation.
+ */
+export function isContextOverflowError(error: unknown): boolean {
+  const msg = (error instanceof Error ? error.message : String(error)).toLowerCase();
+  return (
+    msg.includes("prompt is too long") ||
+    msg.includes("context length exceeded") ||
+    msg.includes("maximum context length") ||
+    msg.includes("request_too_large") ||
+    msg.includes("request size exceeds") ||
+    (msg.includes("413") && msg.includes("too large"))
+  );
+}
--- a/packages/core/src/agent/runner.ts
+++ b/packages/core/src/agent/runner.ts
@ -22,7 +22,14 @@ import {
  checkContextWindow,
  DEFAULT_CONTEXT_TOKENS,
  type ContextWindowGuardResult,
+  estimateTokenUsage,
+  COMPACTION_TRIGGER_RATIO,
+  compactMessagesTokenAware,
+  MIN_KEEP_MESSAGES,
 } from "./context-window/index.js";
+import {
+  pruneToolResults,
+} from "./context-window/tool-result-pruning.js";
 import { mergeToolsConfig, type ToolsConfig } from "./tools/policy.js";
 import {
  loadAuthProfileStore,
@ -42,6 +49,7 @@ import {
  sanitizeToolCallInputs,
  sanitizeToolUseResultPairing,
 } from "./session/session-transcript-repair.js";
+import { isContextOverflowError } from "./errors.js";

 // ============================================================
 // Error classification for auth profile rotation
@ -89,11 +97,15 @@ export class Agent {
  private readonly stderr: NodeJS.WritableStream;
  private initialized = false;

+  // Context window settings (for pre-flight compaction)
+  private readonly reserveTokens: number;
+
  // Internal run state
  private _internalRun = false;
  private _isRunning = false;
  private _aborted = false;
  private _runMutex: Promise<void> = Promise.resolve();
+  private _compactionPromise: Promise<void> = Promise.resolve();
  private currentUserDisplayPrompt: string | undefined;

  // MulticaEvent subscribers (parallel to PiAgentCore's subscriber list)
@ -188,8 +200,10 @@ export class Agent {
        return this.currentApiKey;
      },
      transformContext: async (messages) => {
-        const sanitizedInputs = sanitizeToolCallInputs(messages);
-        return sanitizeToolUseResultPairing(sanitizedInputs);
+        let result = sanitizeToolCallInputs(messages);
+        result = sanitizeToolUseResultPairing(result);
+        result = this.preflightCompact(result);
+        return result;
      },
    });

@ -260,6 +274,9 @@ export class Agent {
      ? resolveApiKey(this.resolvedProvider, options.apiKey)
      : undefined;

+    // Store reserveTokens for pre-flight compaction
+    this.reserveTokens = options.reserveTokens ?? 1024;
+
    // 创建 SessionManager（带 context window 配置）
    this.session = new SessionManager({
      sessionId: this.sessionId,
@ -425,6 +442,8 @@ export class Agent {
    prompt: string,
    options?: { displayPrompt?: string },
  ): Promise<AgentRunResult> {
+    // Wait for any in-flight compaction from the previous run
+    await this._compactionPromise;
    await this.ensureInitialized();
    this.refreshAuthState();
    this.output.state.lastAssistantText = "";
@ -444,6 +463,9 @@ export class Agent {
      const canRotate = !this.pinnedProfile && this.profileCandidates.length > 1;
      let lastError: unknown;

+      const MAX_OVERFLOW_COMPACTION_ATTEMPTS = 2;
+      let overflowAttempts = 0;
+
      // Loop to exhaust all candidate profiles on rotatable errors
      while (true) {
        try {
@ -452,6 +474,34 @@ export class Agent {
        } catch (error) {
          lastError = error;

+          // Context overflow recovery: auto-compact and retry before trying auth rotation
+          if (isContextOverflowError(error) && overflowAttempts < MAX_OVERFLOW_COMPACTION_ATTEMPTS) {
+            overflowAttempts++;
+            this.stderr.write(
+              `[context-overflow] Overflow detected (attempt ${overflowAttempts}/${MAX_OVERFLOW_COMPACTION_ATTEMPTS}), compacting...\n`,
+            );
+            const messages = this.agent.state.messages.slice();
+            const result = await this.session.maybeCompact(messages);
+            if (result?.kept) {
+              this.agent.replaceMessages(result.kept);
+              this.output.state.lastAssistantText = "";
+              continue; // retry with compacted messages
+            }
+            // Forced fallback: estimation may diverge from reality (the LLM
+            // already told us the context is too large), so drop the oldest
+            // half of messages even when maybeCompact thinks no compaction is needed.
+            if (messages.length > MIN_KEEP_MESSAGES) {
+              const keepCount = Math.max(MIN_KEEP_MESSAGES, Math.floor(messages.length / 2));
+              const forcedKept = messages.slice(-keepCount);
+              this.stderr.write(
+                `[context-overflow] Forced compaction: ${messages.length} → ${forcedKept.length} messages\n`,
+              );
+              this.agent.replaceMessages(forcedKept);
+              this.output.state.lastAssistantText = "";
+              continue;
+            }
+          }
+
          const reason = classifyError(error);
          if (this.currentProfileId && isRotatableError(reason)) {
            markAuthProfileFailure(this.currentProfileId, reason);
@ -615,35 +665,88 @@ export class Agent {
      // Skip compaction during internal runs — internal messages will be
      // rolled back from memory afterwards, so compacting now would be incorrect.
      if (message.role === "assistant" && !this._internalRun) {
-        void this.maybeCompact();
+        this._compactionPromise = this.maybeCompact().catch((err) => {
+          console.error("[Agent] Compaction failed:", err);
+        });
      }
    }
  }

+  /**
+   * Pre-flight context compaction — runs inside transformContext before every LLM call.
+   * Pure in-memory, no disk writes. Prunes tool results and drops oldest messages
+   * when the estimated token utilization exceeds the compaction trigger threshold.
+   */
+  private preflightCompact(messages: AgentMessage[]): AgentMessage[] {
+    const estimation = estimateTokenUsage({
+      messages,
+      systemPrompt: this.agent.state.systemPrompt,
+      contextWindowTokens: this.contextWindowGuard.tokens,
+      reserveTokens: this.reserveTokens,
+    });
+
+    if (estimation.utilizationRatio < COMPACTION_TRIGGER_RATIO) {
+      return messages; // fast path
+    }
+
+    const originalCount = messages.length;
+    let result = messages;
+
+    // Phase 1: Prune tool results (soft trim + hard clear)
+    const pruneResult = pruneToolResults({
+      messages: result,
+      contextWindowTokens: this.contextWindowGuard.tokens,
+    });
+    if (pruneResult.changed) {
+      result = pruneResult.messages;
+    }
+
+    // Re-estimate after pruning
+    const afterPrune = estimateTokenUsage({
+      messages: result,
+      systemPrompt: this.agent.state.systemPrompt,
+      contextWindowTokens: this.contextWindowGuard.tokens,
+      reserveTokens: this.reserveTokens,
+    });
+
+    // Phase 2: Drop oldest messages if still over threshold
+    if (afterPrune.utilizationRatio >= COMPACTION_TRIGGER_RATIO) {
+      const compacted = compactMessagesTokenAware(result, afterPrune.availableTokens);
+      if (compacted) {
+        result = compacted.kept;
+      }
+    }
+
+    if (result.length < originalCount) {
+      const saved = originalCount - result.length;
+      this.stderr.write(
+        `[pre-flight compaction] pruned ${saved} messages (${originalCount} → ${result.length})\n`,
+      );
+    }
+
+    return result;
+  }
+
  private async maybeCompact() {
    const messages = this.agent.state.messages.slice();
    if (!this.session.needsCompaction(messages)) return;

-    try {
-      const result = await this.session.maybeCompact(messages);
-      if (!result) return;
+    const result = await this.session.maybeCompact(messages);
+    if (!result) return;

-      this.emitMulticaEvent({ type: "compaction_start" });
-      if (result?.kept) {
-        this.agent.replaceMessages(result.kept);
-      }
-      const endEvent: CompactionEndEvent = {
-        type: "compaction_end",
-        removed: result?.removedCount ?? 0,
-        kept: result?.kept.length ?? messages.length,
-        tokensRemoved: result?.tokensRemoved,
-        tokensKept: result?.tokensKept,
-        reason: result?.reason ?? "tokens",
-      };
-      this.emitMulticaEvent(endEvent);
-    } catch (err) {
-      throw err;
+    this.emitMulticaEvent({ type: "compaction_start" });
+    if (result.kept) {
+      this.agent.replaceMessages(result.kept);
    }
+    const endEvent: CompactionEndEvent = {
+      type: "compaction_end",
+      removed: result.removedCount ?? 0,
+      kept: result.kept.length ?? messages.length,
+      tokensRemoved: result.tokensRemoved,
+      tokensKept: result.tokensKept,
+      reason: result.reason ?? "tokens",
+    };
+    this.emitMulticaEvent(endEvent);
  }

  /**
--- a/packages/core/src/agent/session/compaction.test.ts
+++ b/packages/core/src/agent/session/compaction.test.ts
@ -44,7 +44,7 @@ vi.mock("../context-window/index.js", async () => {
      const systemPromptTokens = params.systemPrompt ? 100 : 0;
      const reserve = params.reserveTokens ?? 1024;
      const availableTokens = Math.max(0, params.contextWindowTokens - systemPromptTokens - reserve);
-      const utilizationRatio = availableTokens > 0 ? (messageTokens * 1.2) / availableTokens : 1;
+      const utilizationRatio = availableTokens > 0 ? (messageTokens * 1.5) / availableTokens : 1;

      return {
        messageTokens,
@ -234,7 +234,7 @@ describe("compaction", () => {
        // 100 * 10 = 1000 message tokens
        // System: 100 tokens, Reserve: 1024
        // Available: 2000 - 100 - 1024 = 876
-        // Utilization: (1000 * 1.2) / 876 = 1.37 > 0.8
+        // Utilization: (1000 * 1.5) / 876 = 1.71 > 0.8
        const result = compactMessages(messages, {
          mode: "tokens",
          contextWindowTokens: 2000,
@ -249,7 +249,7 @@ describe("compaction", () => {
        const messages = createMessages(5);
        // 5 * 10 = 50 message tokens
        // Available: 10000 - 100 - 1024 = 8876
-        // Utilization: (50 * 1.2) / 8876 = 0.007 < 0.8
+        // Utilization: (50 * 1.5) / 8876 = 0.008 < 0.8
        const result = compactMessages(messages, {
          mode: "tokens",
          contextWindowTokens: 10000,
--- a/packages/hooks/src/use-chat.ts
+++ b/packages/hooks/src/use-chat.ts
@ -9,13 +9,22 @@ import {
  type AgentMessageItem,
  type ExecApprovalRequestPayload,
  type ApprovalDecision,
+  type CompactionEndEvent,
 } from "@multica/sdk";

 export type ToolStatus = "running" | "success" | "error" | "interrupted";

+export interface CompactionInfo {
+  removed: number;
+  kept: number;
+  tokensRemoved?: number;
+  tokensKept?: number;
+  reason: string;
+}
+
 export interface Message {
  id: string;
-  role: "user" | "assistant" | "toolResult";
+  role: "user" | "assistant" | "toolResult" | "system";
  content: ContentBlock[];
  agentId: string;
  stopReason?: string;
@ -24,6 +33,8 @@ export interface Message {
  toolArgs?: Record<string, unknown>;
  toolStatus?: ToolStatus;
  isError?: boolean;
+  systemType?: "compaction";
+  compaction?: CompactionInfo;
 }

 export interface ChatError {
@ -215,6 +226,27 @@ export function useChat() {
      }
      case "tool_execution_update":
        break;
+      case "compaction_end": {
+        const ce = event as CompactionEndEvent;
+        setMessages((prev) => [
+          ...prev,
+          {
+            id: uuidv7(),
+            role: "system",
+            content: [],
+            agentId: payload.agentId,
+            systemType: "compaction",
+            compaction: {
+              removed: ce.removed,
+              kept: ce.kept,
+              tokensRemoved: ce.tokensRemoved,
+              tokensKept: ce.tokensKept,
+              reason: ce.reason,
+            },
+          },
+        ]);
+        break;
+      }
    }
  }, []);

--- a/packages/store/src/types.ts
+++ b/packages/store/src/types.ts
@ -2,9 +2,17 @@ import type { ContentBlock } from "@multica/sdk"

 export type ToolStatus = "running" | "success" | "error" | "interrupted"

+export interface CompactionInfo {
+  removed: number
+  kept: number
+  tokensRemoved?: number
+  tokensKept?: number
+  reason: string
+}
+
 export interface Message {
  id: string
-  role: "user" | "assistant" | "toolResult"
+  role: "user" | "assistant" | "toolResult" | "system"
  content: ContentBlock[]
  agentId: string
  stopReason?: string
@ -13,4 +21,6 @@ export interface Message {
  toolArgs?: Record<string, unknown>
  toolStatus?: ToolStatus
  isError?: boolean
+  systemType?: "compaction"
+  compaction?: CompactionInfo
 }
--- a/packages/ui/src/components/compaction-item.tsx
+++ b/packages/ui/src/components/compaction-item.tsx
@ -0,0 +1,45 @@
+"use client"
+
+import { memo } from "react"
+import { Scissors } from "lucide-react"
+import type { Message } from "@multica/store"
+
+function formatTokens(n: number): string {
+  if (n >= 1000) return `~${(n / 1000).toFixed(1)}k`
+  return `${n}`
+}
+
+interface CompactionItemProps {
+  message: Message
+}
+
+export const CompactionItem = memo(function CompactionItem({ message }: CompactionItemProps) {
+  const info = message.compaction
+  if (!info) return null
+
+  const label = info.reason === "summary" ? "Context summarized" : "Context compacted"
+  const removed = `${info.removed} messages removed`
+  const tokens = info.tokensRemoved != null
+    ? `, ${formatTokens(info.tokensRemoved)} tokens freed`
+    : ""
+
+  return (
+    <div className="py-0.5 px-2.5 text-sm text-muted-foreground">
+      <div className="flex items-center gap-1.5 px-2.5 py-1">
+        {/* Status dot */}
+        <span className="size-1.5 rounded-full shrink-0 bg-muted-foreground/40" />
+
+        {/* Icon */}
+        <Scissors className="size-3.5 shrink-0" />
+
+        {/* Label */}
+        <span className="font-medium shrink-0">{label}</span>
+
+        {/* Stats */}
+        <span className="ml-auto text-xs text-muted-foreground/60 shrink-0">
+          {removed}{tokens}
+        </span>
+      </div>
+    </div>
+  )
+})
--- a/packages/ui/src/components/message-list.tsx
+++ b/packages/ui/src/components/message-list.tsx
@ -5,6 +5,7 @@ import { MemoizedMarkdown } from "@multica/ui/components/markdown";
 import { StreamingMarkdown } from "@multica/ui/components/markdown/StreamingMarkdown";
 import { ToolCallItem } from "@multica/ui/components/tool-call-item";
 import { ThinkingItem } from "@multica/ui/components/thinking-item";
+import { CompactionItem } from "@multica/ui/components/compaction-item";
 import { cn, getTextContent } from "@multica/ui/lib/utils";
 import type { Message } from "@multica/store";
 import type { ContentBlock, ToolCall, ThinkingContent } from "@multica/sdk";
@ -78,6 +79,11 @@ export const MessageList = memo(function MessageList({ messages, streamingIds }:
  return (
    <div className="relative p-6 px-4 sm:px-10 max-w-4xl mx-auto">
      {messages.map((msg) => {
+        // System messages (e.g. compaction notifications)
+        if (msg.role === "system") {
+          return <CompactionItem key={msg.id} message={msg} />
+        }
+
        // ToolResult messages → render as tool execution item
        if (msg.role === "toolResult") {
          return <ToolCallItem key={msg.id} message={msg} />