diff --git a/packages/core/src/agent/context-window/token-estimation.test.ts b/packages/core/src/agent/context-window/token-estimation.test.ts
index cef9c54c..749097ef 100644
--- a/packages/core/src/agent/context-window/token-estimation.test.ts
+++ b/packages/core/src/agent/context-window/token-estimation.test.ts
@@ -37,7 +37,7 @@ vi.mock("@mariozechner/pi-coding-agent", () => ({
 describe("token-estimation", () => {
   describe("constants", () => {
     it("should have correct safety margin", () => {
-      expect(ESTIMATION_SAFETY_MARGIN).toBe(1.2);
+      expect(ESTIMATION_SAFETY_MARGIN).toBe(1.5);
     });
 
     it("should have correct compaction trigger ratio", () => {
@@ -63,20 +63,20 @@ describe("token-estimation", () => {
     });
 
     it("should estimate tokens based on character count", () => {
-      // ~3 chars per token
-      expect(estimateSystemPromptTokens("abc")).toBe(1);
-      expect(estimateSystemPromptTokens("abcdef")).toBe(2);
-      expect(estimateSystemPromptTokens("abcdefghi")).toBe(3);
+      // ~2 chars per token (conservative for CJK/mixed content)
+      expect(estimateSystemPromptTokens("ab")).toBe(1);
+      expect(estimateSystemPromptTokens("abcd")).toBe(2);
+      expect(estimateSystemPromptTokens("abcdef")).toBe(3);
     });
 
     it("should ceil the result", () => {
-      // 4 chars / 3 = 1.33, should ceil to 2
-      expect(estimateSystemPromptTokens("abcd")).toBe(2);
+      // 3 chars / 2 = 1.5, should ceil to 2
+      expect(estimateSystemPromptTokens("abc")).toBe(2);
     });
 
     it("should handle long prompts", () => {
       const longPrompt = "a".repeat(3000);
-      expect(estimateSystemPromptTokens(longPrompt)).toBe(1000);
+      expect(estimateSystemPromptTokens(longPrompt)).toBe(1500);
     });
   });
 
@@ -140,7 +140,7 @@ describe("token-estimation", () => {
         reserveTokens: 0,
       });
 
-      // Utilization = (tokens * 1.2) / available
+      // Utilization = (tokens * 1.5) / available
       expect(result.utilizationRatio).toBeGreaterThan(0);
     });
   });
@@ -292,26 +292,26 @@ describe("token-estimation", () => {
         content: "x".repeat(400), // ~100 tokens
       } as AgentMessage;
 
-      // With safety margin 1.2, 100 * 1.2 = 120 tokens
-      // 120 > 1000 * 0.1 = 100, so oversized
+      // With safety margin 1.5, 100 * 1.5 = 150 tokens
+      // 150 > 1000 * 0.1 = 100, so oversized
       expect(isMessageOversized(message, 1000, 0.1)).toBe(true);
 
-      // 120 < 1000 * 0.2 = 200, so not oversized
+      // 150 < 1000 * 0.2 = 200, so not oversized
       expect(isMessageOversized(message, 1000, 0.2)).toBe(false);
     });
 
     it("should apply safety margin to token count", () => {
       const message = {
         role: "user",
-        content: "x".repeat(400), // ~100 tokens, with margin ~120
+        content: "x".repeat(400), // ~100 tokens, with margin ~150
       } as AgentMessage;
 
       // Without margin: 100 < 250 (50% of 500)
-      // With margin: 120 < 250, still ok
+      // With margin: 150 < 250, still ok
       expect(isMessageOversized(message, 500, 0.5)).toBe(false);
 
       // Without margin: 100 < 100 would be false
-      // With margin: 120 > 100, should be true
+      // With margin: 150 > 100, should be true
       expect(isMessageOversized(message, 200, 0.5)).toBe(true);
     });
   });
diff --git a/packages/core/src/agent/context-window/token-estimation.ts b/packages/core/src/agent/context-window/token-estimation.ts
index 7899b050..26524f67 100644
--- a/packages/core/src/agent/context-window/token-estimation.ts
+++ b/packages/core/src/agent/context-window/token-estimation.ts
@@ -9,7 +9,7 @@ import { estimateTokens } from "@mariozechner/pi-coding-agent";
 import type { TokenEstimation, TokenAwareCompactionResult } from "./types.js";
 
 /** Safety margin coefficient to compensate for estimation inaccuracy */
-export const ESTIMATION_SAFETY_MARGIN = 1.2; // 20% buffer
+export const ESTIMATION_SAFETY_MARGIN = 1.5; // 50% buffer (covers CJK and mixed content)
 
 /** Utilization threshold for triggering compaction */
 export const COMPACTION_TRIGGER_RATIO = 0.8; // 80%
@@ -32,10 +32,10 @@ export function estimateMessagesTokens(messages: AgentMessage[]): number {
  */
 export function estimateSystemPromptTokens(systemPrompt: string | undefined): number {
   if (!systemPrompt) return 0;
-  // Simple estimation: ~4 chars = 1 token (for English/code mixed text)
-  // Chinese ~2 chars = 1 token
-  // Average value of 3
-  return Math.ceil(systemPrompt.length / 3);
+  // Conservative estimation: ~2 chars = 1 token
+  // English/code averages ~4 chars/token but CJK averages ~1-2 chars/token.
+  // Using /2 as a safe default to prevent underestimation on mixed content.
+  return Math.ceil(systemPrompt.length / 2);
 }
 
 /**
diff --git a/packages/core/src/agent/errors.ts b/packages/core/src/agent/errors.ts
new file mode 100644
index 00000000..7f51bfce
--- /dev/null
+++ b/packages/core/src/agent/errors.ts
@@ -0,0 +1,21 @@
+/**
+ * Error classification utilities for agent error handling.
+ */
+
+/**
+ * Check if an error is a context overflow / "prompt too long" error from any LLM provider.
+ *
+ * These errors indicate the request exceeded the model's context window and should
+ * trigger auto-compaction rather than auth profile rotation.
+ */
+export function isContextOverflowError(error: unknown): boolean {
+  const msg = (error instanceof Error ? error.message : String(error)).toLowerCase();
+  return (
+    msg.includes("prompt is too long") ||
+    msg.includes("context length exceeded") ||
+    msg.includes("maximum context length") ||
+    msg.includes("request_too_large") ||
+    msg.includes("request size exceeds") ||
+    (msg.includes("413") && msg.includes("too large"))
+  );
+}
diff --git a/packages/core/src/agent/runner.ts b/packages/core/src/agent/runner.ts
index 81b7fc26..105eee43 100644
--- a/packages/core/src/agent/runner.ts
+++ b/packages/core/src/agent/runner.ts
@@ -22,7 +22,14 @@ import {
   checkContextWindow,
   DEFAULT_CONTEXT_TOKENS,
   type ContextWindowGuardResult,
+  estimateTokenUsage,
+  COMPACTION_TRIGGER_RATIO,
+  compactMessagesTokenAware,
+  MIN_KEEP_MESSAGES,
 } from "./context-window/index.js";
+import {
+  pruneToolResults,
+} from "./context-window/tool-result-pruning.js";
 import { mergeToolsConfig, type ToolsConfig } from "./tools/policy.js";
 import {
   loadAuthProfileStore,
@@ -42,6 +49,7 @@ import {
   sanitizeToolCallInputs,
   sanitizeToolUseResultPairing,
 } from "./session/session-transcript-repair.js";
+import { isContextOverflowError } from "./errors.js";
 
 // ============================================================
 // Error classification for auth profile rotation
@@ -89,11 +97,15 @@ export class Agent {
   private readonly stderr: NodeJS.WritableStream;
   private initialized = false;
 
+  // Context window settings (for pre-flight compaction)
+  private readonly reserveTokens: number;
+
   // Internal run state
   private _internalRun = false;
   private _isRunning = false;
   private _aborted = false;
   private _runMutex: Promise<void> = Promise.resolve();
+  private _compactionPromise: Promise<void> = Promise.resolve();
   private currentUserDisplayPrompt: string | undefined;
 
   // MulticaEvent subscribers (parallel to PiAgentCore's subscriber list)
@@ -188,8 +200,10 @@ export class Agent {
         return this.currentApiKey;
       },
       transformContext: async (messages) => {
-        const sanitizedInputs = sanitizeToolCallInputs(messages);
-        return sanitizeToolUseResultPairing(sanitizedInputs);
+        let result = sanitizeToolCallInputs(messages);
+        result = sanitizeToolUseResultPairing(result);
+        result = this.preflightCompact(result);
+        return result;
       },
     });
 
@@ -260,6 +274,9 @@ export class Agent {
       ? resolveApiKey(this.resolvedProvider, options.apiKey)
       : undefined;
 
+    // Store reserveTokens for pre-flight compaction
+    this.reserveTokens = options.reserveTokens ?? 1024;
+
     // 创建 SessionManager（带 context window 配置）
     this.session = new SessionManager({
       sessionId: this.sessionId,
@@ -425,6 +442,8 @@ export class Agent {
     prompt: string,
     options?: { displayPrompt?: string },
   ): Promise<AgentRunResult> {
+    // Wait for any in-flight compaction from the previous run
+    await this._compactionPromise;
     await this.ensureInitialized();
     this.refreshAuthState();
     this.output.state.lastAssistantText = "";
@@ -444,6 +463,9 @@ export class Agent {
       const canRotate = !this.pinnedProfile && this.profileCandidates.length > 1;
       let lastError: unknown;
 
+      const MAX_OVERFLOW_COMPACTION_ATTEMPTS = 2;
+      let overflowAttempts = 0;
+
       // Loop to exhaust all candidate profiles on rotatable errors
       while (true) {
         try {
@@ -452,6 +474,34 @@ export class Agent {
         } catch (error) {
           lastError = error;
 
+          // Context overflow recovery: auto-compact and retry before trying auth rotation
+          if (isContextOverflowError(error) && overflowAttempts < MAX_OVERFLOW_COMPACTION_ATTEMPTS) {
+            overflowAttempts++;
+            this.stderr.write(
+              `[context-overflow] Overflow detected (attempt ${overflowAttempts}/${MAX_OVERFLOW_COMPACTION_ATTEMPTS}), compacting...\n`,
+            );
+            const messages = this.agent.state.messages.slice();
+            const result = await this.session.maybeCompact(messages);
+            if (result?.kept) {
+              this.agent.replaceMessages(result.kept);
+              this.output.state.lastAssistantText = "";
+              continue; // retry with compacted messages
+            }
+            // Forced fallback: estimation may diverge from reality (the LLM
+            // already told us the context is too large), so drop the oldest
+            // half of messages even when maybeCompact thinks no compaction is needed.
+            if (messages.length > MIN_KEEP_MESSAGES) {
+              const keepCount = Math.max(MIN_KEEP_MESSAGES, Math.floor(messages.length / 2));
+              const forcedKept = messages.slice(-keepCount);
+              this.stderr.write(
+                `[context-overflow] Forced compaction: ${messages.length} → ${forcedKept.length} messages\n`,
+              );
+              this.agent.replaceMessages(forcedKept);
+              this.output.state.lastAssistantText = "";
+              continue;
+            }
+          }
+
           const reason = classifyError(error);
           if (this.currentProfileId && isRotatableError(reason)) {
             markAuthProfileFailure(this.currentProfileId, reason);
@@ -615,35 +665,88 @@ export class Agent {
       // Skip compaction during internal runs — internal messages will be
       // rolled back from memory afterwards, so compacting now would be incorrect.
       if (message.role === "assistant" && !this._internalRun) {
-        void this.maybeCompact();
+        this._compactionPromise = this.maybeCompact().catch((err) => {
+          console.error("[Agent] Compaction failed:", err);
+        });
       }
     }
   }
 
+  /**
+   * Pre-flight context compaction — runs inside transformContext before every LLM call.
+   * Pure in-memory, no disk writes. Prunes tool results and drops oldest messages
+   * when the estimated token utilization exceeds the compaction trigger threshold.
+   */
+  private preflightCompact(messages: AgentMessage[]): AgentMessage[] {
+    const estimation = estimateTokenUsage({
+      messages,
+      systemPrompt: this.agent.state.systemPrompt,
+      contextWindowTokens: this.contextWindowGuard.tokens,
+      reserveTokens: this.reserveTokens,
+    });
+
+    if (estimation.utilizationRatio < COMPACTION_TRIGGER_RATIO) {
+      return messages; // fast path
+    }
+
+    const originalCount = messages.length;
+    let result = messages;
+
+    // Phase 1: Prune tool results (soft trim + hard clear)
+    const pruneResult = pruneToolResults({
+      messages: result,
+      contextWindowTokens: this.contextWindowGuard.tokens,
+    });
+    if (pruneResult.changed) {
+      result = pruneResult.messages;
+    }
+
+    // Re-estimate after pruning
+    const afterPrune = estimateTokenUsage({
+      messages: result,
+      systemPrompt: this.agent.state.systemPrompt,
+      contextWindowTokens: this.contextWindowGuard.tokens,
+      reserveTokens: this.reserveTokens,
+    });
+
+    // Phase 2: Drop oldest messages if still over threshold
+    if (afterPrune.utilizationRatio >= COMPACTION_TRIGGER_RATIO) {
+      const compacted = compactMessagesTokenAware(result, afterPrune.availableTokens);
+      if (compacted) {
+        result = compacted.kept;
+      }
+    }
+
+    if (result.length < originalCount) {
+      const saved = originalCount - result.length;
+      this.stderr.write(
+        `[pre-flight compaction] pruned ${saved} messages (${originalCount} → ${result.length})\n`,
+      );
+    }
+
+    return result;
+  }
+
   private async maybeCompact() {
     const messages = this.agent.state.messages.slice();
     if (!this.session.needsCompaction(messages)) return;
 
-    try {
-      const result = await this.session.maybeCompact(messages);
-      if (!result) return;
+    const result = await this.session.maybeCompact(messages);
+    if (!result) return;
 
-      this.emitMulticaEvent({ type: "compaction_start" });
-      if (result?.kept) {
-        this.agent.replaceMessages(result.kept);
-      }
-      const endEvent: CompactionEndEvent = {
-        type: "compaction_end",
-        removed: result?.removedCount ?? 0,
-        kept: result?.kept.length ?? messages.length,
-        tokensRemoved: result?.tokensRemoved,
-        tokensKept: result?.tokensKept,
-        reason: result?.reason ?? "tokens",
-      };
-      this.emitMulticaEvent(endEvent);
-    } catch (err) {
-      throw err;
+    this.emitMulticaEvent({ type: "compaction_start" });
+    if (result.kept) {
+      this.agent.replaceMessages(result.kept);
     }
+    const endEvent: CompactionEndEvent = {
+      type: "compaction_end",
+      removed: result.removedCount ?? 0,
+      kept: result.kept.length ?? messages.length,
+      tokensRemoved: result.tokensRemoved,
+      tokensKept: result.tokensKept,
+      reason: result.reason ?? "tokens",
+    };
+    this.emitMulticaEvent(endEvent);
   }
 
   /**
diff --git a/packages/core/src/agent/session/compaction.test.ts b/packages/core/src/agent/session/compaction.test.ts
index 124b649f..3529b59d 100644
--- a/packages/core/src/agent/session/compaction.test.ts
+++ b/packages/core/src/agent/session/compaction.test.ts
@@ -44,7 +44,7 @@ vi.mock("../context-window/index.js", async () => {
       const systemPromptTokens = params.systemPrompt ? 100 : 0;
       const reserve = params.reserveTokens ?? 1024;
       const availableTokens = Math.max(0, params.contextWindowTokens - systemPromptTokens - reserve);
-      const utilizationRatio = availableTokens > 0 ? (messageTokens * 1.2) / availableTokens : 1;
+      const utilizationRatio = availableTokens > 0 ? (messageTokens * 1.5) / availableTokens : 1;
 
       return {
         messageTokens,
@@ -234,7 +234,7 @@ describe("compaction", () => {
         // 100 * 10 = 1000 message tokens
         // System: 100 tokens, Reserve: 1024
         // Available: 2000 - 100 - 1024 = 876
-        // Utilization: (1000 * 1.2) / 876 = 1.37 > 0.8
+        // Utilization: (1000 * 1.5) / 876 = 1.71 > 0.8
         const result = compactMessages(messages, {
           mode: "tokens",
           contextWindowTokens: 2000,
@@ -249,7 +249,7 @@ describe("compaction", () => {
         const messages = createMessages(5);
         // 5 * 10 = 50 message tokens
         // Available: 10000 - 100 - 1024 = 8876
-        // Utilization: (50 * 1.2) / 8876 = 0.007 < 0.8
+        // Utilization: (50 * 1.5) / 8876 = 0.008 < 0.8
         const result = compactMessages(messages, {
           mode: "tokens",
           contextWindowTokens: 10000,
diff --git a/packages/hooks/src/use-chat.ts b/packages/hooks/src/use-chat.ts
index 5711cde5..208f06b2 100644
--- a/packages/hooks/src/use-chat.ts
+++ b/packages/hooks/src/use-chat.ts
@@ -9,13 +9,22 @@ import {
   type AgentMessageItem,
   type ExecApprovalRequestPayload,
   type ApprovalDecision,
+  type CompactionEndEvent,
 } from "@multica/sdk";
 
 export type ToolStatus = "running" | "success" | "error" | "interrupted";
 
+export interface CompactionInfo {
+  removed: number;
+  kept: number;
+  tokensRemoved?: number;
+  tokensKept?: number;
+  reason: string;
+}
+
 export interface Message {
   id: string;
-  role: "user" | "assistant" | "toolResult";
+  role: "user" | "assistant" | "toolResult" | "system";
   content: ContentBlock[];
   agentId: string;
   stopReason?: string;
@@ -24,6 +33,8 @@ export interface Message {
   toolArgs?: Record<string, unknown>;
   toolStatus?: ToolStatus;
   isError?: boolean;
+  systemType?: "compaction";
+  compaction?: CompactionInfo;
 }
 
 export interface ChatError {
@@ -215,6 +226,27 @@ export function useChat() {
       }
       case "tool_execution_update":
         break;
+      case "compaction_end": {
+        const ce = event as CompactionEndEvent;
+        setMessages((prev) => [
+          ...prev,
+          {
+            id: uuidv7(),
+            role: "system",
+            content: [],
+            agentId: payload.agentId,
+            systemType: "compaction",
+            compaction: {
+              removed: ce.removed,
+              kept: ce.kept,
+              tokensRemoved: ce.tokensRemoved,
+              tokensKept: ce.tokensKept,
+              reason: ce.reason,
+            },
+          },
+        ]);
+        break;
+      }
     }
   }, []);
 
diff --git a/packages/store/src/types.ts b/packages/store/src/types.ts
index 40654954..d0d48340 100644
--- a/packages/store/src/types.ts
+++ b/packages/store/src/types.ts
@@ -2,9 +2,17 @@ import type { ContentBlock } from "@multica/sdk"
 
 export type ToolStatus = "running" | "success" | "error" | "interrupted"
 
+export interface CompactionInfo {
+  removed: number
+  kept: number
+  tokensRemoved?: number
+  tokensKept?: number
+  reason: string
+}
+
 export interface Message {
   id: string
-  role: "user" | "assistant" | "toolResult"
+  role: "user" | "assistant" | "toolResult" | "system"
   content: ContentBlock[]
   agentId: string
   stopReason?: string
@@ -13,4 +21,6 @@ export interface Message {
   toolArgs?: Record<string, unknown>
   toolStatus?: ToolStatus
   isError?: boolean
+  systemType?: "compaction"
+  compaction?: CompactionInfo
 }
diff --git a/packages/ui/src/components/compaction-item.tsx b/packages/ui/src/components/compaction-item.tsx
new file mode 100644
index 00000000..1425f94c
--- /dev/null
+++ b/packages/ui/src/components/compaction-item.tsx
@@ -0,0 +1,45 @@
+"use client"
+
+import { memo } from "react"
+import { Scissors } from "lucide-react"
+import type { Message } from "@multica/store"
+
+function formatTokens(n: number): string {
+  if (n >= 1000) return `~${(n / 1000).toFixed(1)}k`
+  return `${n}`
+}
+
+interface CompactionItemProps {
+  message: Message
+}
+
+export const CompactionItem = memo(function CompactionItem({ message }: CompactionItemProps) {
+  const info = message.compaction
+  if (!info) return null
+
+  const label = info.reason === "summary" ? "Context summarized" : "Context compacted"
+  const removed = `${info.removed} messages removed`
+  const tokens = info.tokensRemoved != null
+    ? `, ${formatTokens(info.tokensRemoved)} tokens freed`
+    : ""
+
+  return (
+    <div className="py-0.5 px-2.5 text-sm text-muted-foreground">
+      <div className="flex items-center gap-1.5 px-2.5 py-1">
+        {/* Status dot */}
+        <span className="size-1.5 rounded-full shrink-0 bg-muted-foreground/40" />
+
+        {/* Icon */}
+        <Scissors className="size-3.5 shrink-0" />
+
+        {/* Label */}
+        <span className="font-medium shrink-0">{label}</span>
+
+        {/* Stats */}
+        <span className="ml-auto text-xs text-muted-foreground/60 shrink-0">
+          {removed}{tokens}
+        </span>
+      </div>
+    </div>
+  )
+})
diff --git a/packages/ui/src/components/message-list.tsx b/packages/ui/src/components/message-list.tsx
index 3a754bc3..b2ea2fa2 100644
--- a/packages/ui/src/components/message-list.tsx
+++ b/packages/ui/src/components/message-list.tsx
@@ -5,6 +5,7 @@ import { MemoizedMarkdown } from "@multica/ui/components/markdown";
 import { StreamingMarkdown } from "@multica/ui/components/markdown/StreamingMarkdown";
 import { ToolCallItem } from "@multica/ui/components/tool-call-item";
 import { ThinkingItem } from "@multica/ui/components/thinking-item";
+import { CompactionItem } from "@multica/ui/components/compaction-item";
 import { cn, getTextContent } from "@multica/ui/lib/utils";
 import type { Message } from "@multica/store";
 import type { ContentBlock, ToolCall, ThinkingContent } from "@multica/sdk";
@@ -78,6 +79,11 @@ export const MessageList = memo(function MessageList({ messages, streamingIds }:
   return (
     <div className="relative p-6 px-4 sm:px-10 max-w-4xl mx-auto">
       {messages.map((msg) => {
+        // System messages (e.g. compaction notifications)
+        if (msg.role === "system") {
+          return <CompactionItem key={msg.id} message={msg} />
+        }
+
         // ToolResult messages → render as tool execution item
         if (msg.role === "toolResult") {
           return <ToolCallItem key={msg.id} message={msg} />