Merge pull request #157 from multica-ai/feat/improve-compaction

feat(compaction): improve summary mode with fallback, split-turn, and metadata
2026-02-13 19:15:38 +08:00 · 2026-02-13 19:15:38 +08:00 · 882dc8592b
commit 882dc8592b
parent 77aff992c1 d7e85d0c25
9 changed files with 574 additions and 59 deletions
--- a/packages/core/src/agent/context-window/compaction-metadata.ts
+++ b/packages/core/src/agent/context-window/compaction-metadata.ts
@ -0,0 +1,156 @@
+/**
+ * Compaction Metadata — extract file operations & tool failures from compacted messages
+ *
+ * Appended to summaries so the agent retains awareness of what files were touched
+ * and which tool invocations failed, even after the original messages are removed.
+ */
+
+import type { AgentMessage } from "@mariozechner/pi-agent-core";
+
+// ── Types ──────────────────────────────────────────────────────────────────
+
+export type ToolFailure = {
+  toolName: string;
+  summary: string;
+};
+
+export type FileOperations = {
+  readFiles: string[];
+  modifiedFiles: string[];
+};
+
+// ── Tool failure extraction ────────────────────────────────────────────────
+
+const MAX_TOOL_FAILURES = 8;
+const ERROR_SUMMARY_MAX_LEN = 240;
+
+/**
+ * Collect tool failures (is_error: true tool_result blocks) from messages.
+ * Deduplicates by toolCallId and caps at MAX_TOOL_FAILURES.
+ */
+export function collectToolFailures(messages: AgentMessage[]): ToolFailure[] {
+  const seen = new Set<string>();
+  const failures: ToolFailure[] = [];
+
+  // First pass: collect tool_use names keyed by id
+  const toolNameById = new Map<string, string>();
+  for (const msg of messages) {
+    if (msg.role !== "assistant") continue;
+    const content = (msg as any).content;
+    if (!Array.isArray(content)) continue;
+    for (const block of content) {
+      if (block.type === "tool_use" && block.id && block.name) {
+        toolNameById.set(block.id, block.name);
+      }
+    }
+  }
+
+  // Second pass: find is_error tool_result blocks
+  for (const msg of messages) {
+    if (msg.role !== "user") continue;
+    const content = (msg as any).content;
+    if (!Array.isArray(content)) continue;
+    for (const block of content) {
+      if (block.type !== "tool_result") continue;
+      if (!block.is_error) continue;
+
+      const toolCallId: string = block.tool_use_id ?? block.id ?? "";
+      if (!toolCallId || seen.has(toolCallId)) continue;
+      seen.add(toolCallId);
+
+      const toolName = toolNameById.get(toolCallId) ?? "unknown";
+      let errorText = typeof block.content === "string"
+        ? block.content
+        : Array.isArray(block.content)
+          ? block.content.map((b: any) => (typeof b === "string" ? b : b.text ?? "")).join(" ")
+          : String(block.content ?? "");
+      if (errorText.length > ERROR_SUMMARY_MAX_LEN) {
+        errorText = errorText.slice(0, ERROR_SUMMARY_MAX_LEN) + "...";
+      }
+
+      failures.push({ toolName, summary: errorText });
+      if (failures.length >= MAX_TOOL_FAILURES) return failures;
+    }
+  }
+
+  return failures;
+}
+
+// ── File operation extraction ──────────────────────────────────────────────
+
+const READ_TOOL_NAMES = new Set(["Read", "read_file"]);
+const WRITE_TOOL_NAMES = new Set(["Write", "Edit", "write_file", "file_edit"]);
+
+/**
+ * Collect file read/modify operations from assistant tool_use blocks.
+ * readFiles excludes any path that also appears in modifiedFiles.
+ */
+export function collectFileOperations(messages: AgentMessage[]): FileOperations {
+  const readSet = new Set<string>();
+  const modifiedSet = new Set<string>();
+
+  for (const msg of messages) {
+    if (msg.role !== "assistant") continue;
+    const content = (msg as any).content;
+    if (!Array.isArray(content)) continue;
+
+    for (const block of content) {
+      if (block.type !== "tool_use") continue;
+      const name: string = block.name ?? "";
+      const input: any = block.input ?? {};
+
+      // Extract file path from common parameter names
+      const filePath: string | undefined =
+        input.file_path ?? input.path ?? input.filePath ?? input.filename;
+      if (!filePath || typeof filePath !== "string") continue;
+
+      if (READ_TOOL_NAMES.has(name)) {
+        readSet.add(filePath);
+      } else if (WRITE_TOOL_NAMES.has(name)) {
+        modifiedSet.add(filePath);
+      }
+    }
+  }
+
+  // Remove modified files from readFiles (to avoid duplication)
+  for (const path of modifiedSet) {
+    readSet.delete(path);
+  }
+
+  return {
+    readFiles: [...readSet],
+    modifiedFiles: [...modifiedSet],
+  };
+}
+
+// ── Formatting ─────────────────────────────────────────────────────────────
+
+/**
+ * Format tool failures as a markdown section.
+ * Returns empty string if no failures.
+ */
+export function formatToolFailuresSection(failures: ToolFailure[]): string {
+  if (failures.length === 0) return "";
+
+  const lines = failures.map(
+    (f) => `- **${f.toolName}**: ${f.summary}`,
+  );
+  return `\n## Tool Failures\n${lines.join("\n")}`;
+}
+
+/**
+ * Format file operations as XML sections.
+ * Returns empty string if no operations.
+ */
+export function formatFileOperationsSection(ops: FileOperations): string {
+  const parts: string[] = [];
+
+  if (ops.readFiles.length > 0) {
+    parts.push(`<read-files>\n${ops.readFiles.join("\n")}\n</read-files>`);
+  }
+  if (ops.modifiedFiles.length > 0) {
+    parts.push(`<modified-files>\n${ops.modifiedFiles.join("\n")}\n</modified-files>`);
+  }
+
+  return parts.length > 0 ? "\n" + parts.join("\n") : "";
+}
--- a/packages/core/src/agent/context-window/index.ts
+++ b/packages/core/src/agent/context-window/index.ts
@ -41,10 +41,24 @@ export {
 export type { SummaryCompactionResult, SummaryCompactionParams } from "./summarization.js";
 export {
  splitMessagesForSummary,
+  detectSplitTurn,
+  computeAdaptiveChunkRatio,
  compactMessagesWithSummary,
  compactMessagesWithChunkedSummary,
 } from "./summarization.js";

+// Summary fallback
+export { summarizeWithFallback } from "./summary-fallback.js";
+
+// Compaction metadata
+export {
+  collectToolFailures,
+  collectFileOperations,
+  formatToolFailuresSection,
+  formatFileOperationsSection,
+} from "./compaction-metadata.js";
+export type { ToolFailure, FileOperations } from "./compaction-metadata.js";
+
 // Tool result pruning
 export type {
  ToolResultPruningSettings,
--- a/packages/core/src/agent/context-window/summarization.ts
+++ b/packages/core/src/agent/context-window/summarization.ts
@ -1,13 +1,22 @@
 /**
 * Summary-based Compaction
 *
- * Uses LLM to generate summaries of historical messages instead of simple truncation
+ * Uses LLM to generate summaries of historical messages instead of simple truncation.
+ * Includes split-turn detection, adaptive chunk sizing, multi-level fallback,
+ * and metadata extraction (file operations + tool failures).
 */

 import type { AgentMessage } from "@mariozechner/pi-agent-core";
-import { generateSummary, estimateTokens } from "@mariozechner/pi-coding-agent";
+import { estimateTokens } from "@mariozechner/pi-coding-agent";
 import type { Model } from "@mariozechner/pi-ai";
 import { estimateMessagesTokens } from "./token-estimation.js";
+import { summarizeWithFallback } from "./summary-fallback.js";
+import {
+  collectToolFailures,
+  collectFileOperations,
+  formatToolFailuresSection,
+  formatFileOperationsSection,
+} from "./compaction-metadata.js";

 /** Summary compaction result */
 export type SummaryCompactionResult = {
@ -23,6 +32,10 @@ export type SummaryCompactionResult = {
  summary: string;
  /** Compaction reason */
  reason: "summary";
+  /** File operations extracted from compacted messages */
+  fileOperations?: { readFiles: string[]; modifiedFiles: string[] } | undefined;
+  /** Tool failures extracted from compacted messages */
+  toolFailures?: Array<{ toolName: string; summary: string }> | undefined;
 };

 /** Summary compaction parameters */
@ -49,7 +62,7 @@ export type SummaryCompactionParams = {
  signal?: AbortSignal | undefined;
 };

-/** 默认摘要提示词 */
+/** Default summary instructions */
 const DEFAULT_SUMMARY_INSTRUCTIONS = `Summarize the conversation history concisely, focusing on:
 - Key decisions made
 - Important context and constraints
@ -114,6 +127,102 @@ export function splitMessagesForSummary(
  return { toSummarize, toKeep };
 }

+// ── Split Turn Detection ───────────────────────────────────────────────────
+
+/**
+ * Detect and fix a "split turn" — when the first kept message is a user message
+ * containing tool_result blocks without the corresponding assistant tool_use.
+ *
+ * When detected, separates the orphaned turn (assistant tool_use + user tool_result)
+ * into a `splitPrefix` for separate summarization, and returns adjusted arrays
+ * where `toSummarize` no longer contains those messages.
+ *
+ * Returns null if no split turn was detected.
+ */
+export function detectSplitTurn(
+  toSummarize: AgentMessage[],
+  toKeep: AgentMessage[],
+): {
+  splitPrefix: AgentMessage[];
+  adjustedToSummarize: AgentMessage[];
+  adjustedToKeep: AgentMessage[];
+} | null {
+  if (toKeep.length === 0) return null;
+
+  const firstKept = toKeep[0]!;
+  if (firstKept.role !== "user") return null;
+
+  // Check if this user message has tool_result blocks
+  const content = (firstKept as any).content;
+  if (!Array.isArray(content)) return null;
+
+  const hasToolResult = content.some((b: any) => b.type === "tool_result");
+  if (!hasToolResult) return null;
+
+  // This is an orphaned tool_result — look back in toSummarize for the assistant tool_use
+  const toolResultIds = new Set(
+    content
+      .filter((b: any) => b.type === "tool_result")
+      .map((b: any) => b.tool_use_id ?? b.id)
+      .filter(Boolean),
+  );
+
+  // Walk backwards through toSummarize to find the assistant with matching tool_use
+  let assistantIndex = -1;
+  for (let i = toSummarize.length - 1; i >= 0; i--) {
+    const msg = toSummarize[i]!;
+    if (msg.role !== "assistant") continue;
+
+    const assistantContent = (msg as any).content;
+    if (!Array.isArray(assistantContent)) continue;
+
+    const hasMatchingToolUse = assistantContent.some(
+      (b: any) => b.type === "tool_use" && toolResultIds.has(b.id),
+    );
+    if (hasMatchingToolUse) {
+      assistantIndex = i;
+      break;
+    }
+  }
+
+  if (assistantIndex < 0) return null;
+
+  // Split prefix: messages from assistantIndex to end of toSummarize + orphaned firstKept
+  const splitPrefix = [
+    ...toSummarize.slice(assistantIndex),
+    firstKept,
+  ];
+  // Truncate toSummarize so the split prefix messages are NOT double-counted
+  const adjustedToSummarize = toSummarize.slice(0, assistantIndex);
+  const adjustedToKeep = toKeep.slice(1);
+
+  return { splitPrefix, adjustedToSummarize, adjustedToKeep };
+}
+
+// ── Adaptive Chunk Ratio ───────────────────────────────────────────────────
+
+const ADAPTIVE_CHUNK_MIN = 0.15;
+const ADAPTIVE_CHUNK_MAX = 0.4;
+
+/**
+ * Compute adaptive chunk ratio based on average message token count.
+ * Larger average messages → smaller ratio (to avoid exceeding limits).
+ * Return value range: [0.15, 0.4] — multiply by availableTokens to get chunk size.
+ */
+export function computeAdaptiveChunkRatio(
+  messages: AgentMessage[],
+): number {
+  if (messages.length === 0) return ADAPTIVE_CHUNK_MAX;
+
+  const totalTokens = estimateMessagesTokens(messages);
+  const avgTokens = totalTokens / messages.length;
+
+  // Scale inversely: high avg → low ratio
+  // avgTokens ~500 → ratio ~0.4; avgTokens ~5000+ → ratio ~0.15
+  const ratio = ADAPTIVE_CHUNK_MAX - (avgTokens / 10000) * (ADAPTIVE_CHUNK_MAX - ADAPTIVE_CHUNK_MIN);
+  return Math.max(ADAPTIVE_CHUNK_MIN, Math.min(ADAPTIVE_CHUNK_MAX, ratio));
+}
+
 /**
 * Create summary message
 */
@ -150,7 +259,7 @@ export async function compactMessagesWithSummary(
    signal,
  } = params;

-  // 分割消息
+  // Split messages
  const split = splitMessagesForSummary(messages, availableTokens, {
    targetRatio,
    minKeepMessages,
@ -160,43 +269,90 @@ export async function compactMessagesWithSummary(
    return null;
  }

-  const { toSummarize, toKeep } = split;
+  let { toSummarize, toKeep } = split;

-  // Generate summary
+  // Detect and handle split turn
+  const splitTurn = detectSplitTurn(toSummarize, toKeep);
+  let splitPrefixSummary = "";
+
+  if (splitTurn) {
+    toSummarize = splitTurn.adjustedToSummarize;
+    toKeep = splitTurn.adjustedToKeep;
+
+    // Summarize the split prefix separately
+    const instructions = customInstructions || DEFAULT_SUMMARY_INSTRUCTIONS;
+    const prefixResult = await summarizeWithFallback({
+      messages: splitTurn.splitPrefix,
+      model,
+      reserveTokens,
+      apiKey,
+      signal,
+      instructions,
+      previousSummary,
+      availableTokens,
+    });
+    splitPrefixSummary = prefixResult.summary;
+  }
+
+  // Generate summary with fallback (toSummarize no longer contains split prefix messages)
  const instructions = customInstructions || DEFAULT_SUMMARY_INSTRUCTIONS;
-  const summary = await generateSummary(
-    toSummarize,
-    model,
-    reserveTokens,
-    apiKey,
-    signal,
-    instructions,
-    previousSummary,
-  );
+  let finalSummary = "";
+
+  if (toSummarize.length > 0) {
+    const { summary } = await summarizeWithFallback({
+      messages: toSummarize,
+      model,
+      reserveTokens,
+      apiKey,
+      signal,
+      instructions,
+      previousSummary,
+      availableTokens,
+    });
+    finalSummary = summary;
+  }
+
+  // Append split prefix summary if present
+  if (splitPrefixSummary) {
+    finalSummary += (finalSummary ? "\n\n" : "") + `## Split Turn Context\n${splitPrefixSummary}`;
+  }
+
+  // Append metadata sections (all compacted = adjusted toSummarize + splitPrefix)
+  const allCompactedMessages = splitTurn
+    ? [...toSummarize, ...splitTurn.splitPrefix]
+    : toSummarize;
+  const failures = collectToolFailures(allCompactedMessages);
+  const fileOps = collectFileOperations(allCompactedMessages);
+
+  finalSummary += formatToolFailuresSection(failures);
+  finalSummary += formatFileOperationsSection(fileOps);

  // Create summary message
-  const summaryMessage = createSummaryMessage(summary, previousSummary);
+  const summaryMessage = createSummaryMessage(finalSummary, previousSummary);

  // Combine results
  const kept = [summaryMessage, ...toKeep];

-  const tokensRemoved = estimateMessagesTokens(toSummarize);
+  const tokensRemoved = estimateMessagesTokens(allCompactedMessages);
  const tokensKept = estimateMessagesTokens(kept);

  return {
    kept,
-    removedCount: toSummarize.length,
+    removedCount: allCompactedMessages.length,
    tokensRemoved,
    tokensKept,
-    summary,
+    summary: finalSummary,
    reason: "summary",
+    fileOperations: (fileOps.readFiles.length > 0 || fileOps.modifiedFiles.length > 0) ? fileOps : undefined,
+    toolFailures: failures.length > 0 ? failures : undefined,
  };
 }

 /**
 * Generate summary in chunks (for very large history)
 *
- * When history is too large, generate summaries by chunks then merge
+ * When history is too large, generate summaries by chunks then merge.
+ * Uses adaptive chunk sizing and multi-level fallback.
 */
 export async function compactMessagesWithChunkedSummary(
  params: SummaryCompactionParams & {
@ -214,7 +370,6 @@ export async function compactMessagesWithChunkedSummary(
    customInstructions,
    previousSummary,
    signal,
-    maxChunkTokens = 50000,
  } = params;

  // Split messages
@ -227,15 +382,36 @@ export async function compactMessagesWithChunkedSummary(
    return null;
  }

-  const { toSummarize, toKeep } = split;
+  let { toSummarize, toKeep } = split;

-  // If messages to summarize are not many, summarize directly
-  const toSummarizeTokens = estimateMessagesTokens(toSummarize);
-  if (toSummarizeTokens <= maxChunkTokens) {
-    return compactMessagesWithSummary(params);
+  // Detect and handle split turn
+  const splitTurn = detectSplitTurn(toSummarize, toKeep);
+  let splitPrefixSummary = "";
+
+  if (splitTurn) {
+    toSummarize = splitTurn.adjustedToSummarize;
+    toKeep = splitTurn.adjustedToKeep;
+
+    // Summarize the split prefix separately
+    const instructions = customInstructions || DEFAULT_SUMMARY_INSTRUCTIONS;
+    const prefixResult = await summarizeWithFallback({
+      messages: splitTurn.splitPrefix,
+      model,
+      reserveTokens,
+      apiKey,
+      signal,
+      instructions,
+      previousSummary,
+      availableTokens,
+    });
+    splitPrefixSummary = prefixResult.summary;
  }

-  // Process in chunks
+  // Compute adaptive chunk size
+  const chunkRatio = computeAdaptiveChunkRatio(toSummarize);
+  const maxChunkTokens = params.maxChunkTokens ?? Math.floor(availableTokens * chunkRatio);
+
+  // Process in chunks (works naturally for single-chunk case too)
  const chunks: AgentMessage[][] = [];
  let currentChunk: AgentMessage[] = [];
  let currentTokens = 0;
@ -257,27 +433,42 @@ export async function compactMessagesWithChunkedSummary(
    chunks.push(currentChunk);
  }

-  // Generate summary for each chunk
+  // Generate summary for each chunk with fallback
  const instructions = customInstructions || DEFAULT_SUMMARY_INSTRUCTIONS;
  const chunkSummaries: string[] = [];

  let runningContext = previousSummary;
  for (const chunk of chunks) {
-    const chunkSummary = await generateSummary(
-      chunk,
+    const { summary: chunkSummary } = await summarizeWithFallback({
+      messages: chunk,
      model,
      reserveTokens,
      apiKey,
      signal,
      instructions,
-      runningContext,
-    );
+      previousSummary: runningContext,
+      availableTokens,
+    });
    chunkSummaries.push(chunkSummary);
    runningContext = chunkSummary;
  }

  // Final summary is the last chunk's summary (already includes previous context)
-  const finalSummary = chunkSummaries[chunkSummaries.length - 1] ?? "";
+  let finalSummary = chunkSummaries[chunkSummaries.length - 1] ?? "";
+
+  // Append split prefix summary if present
+  if (splitPrefixSummary) {
+    finalSummary += (finalSummary ? "\n\n" : "") + `## Split Turn Context\n${splitPrefixSummary}`;
+  }
+
+  // Append metadata sections (all compacted = adjusted toSummarize + splitPrefix)
+  const allCompactedMessages = splitTurn
+    ? [...toSummarize, ...splitTurn.splitPrefix]
+    : toSummarize;
+  const failures = collectToolFailures(allCompactedMessages);
+  const fileOps = collectFileOperations(allCompactedMessages);
+  finalSummary += formatToolFailuresSection(failures);
+  finalSummary += formatFileOperationsSection(fileOps);

  // Create summary message
  const summaryMessage = createSummaryMessage(finalSummary);
@ -285,15 +476,17 @@ export async function compactMessagesWithChunkedSummary(
  // Combine results
  const kept = [summaryMessage, ...toKeep];

-  const tokensRemoved = estimateMessagesTokens(toSummarize);
+  const tokensRemoved = estimateMessagesTokens(allCompactedMessages);
  const tokensKept = estimateMessagesTokens(kept);

  return {
    kept,
-    removedCount: toSummarize.length,
+    removedCount: allCompactedMessages.length,
    tokensRemoved,
    tokensKept,
    summary: finalSummary,
    reason: "summary",
+    fileOperations: (fileOps.readFiles.length > 0 || fileOps.modifiedFiles.length > 0) ? fileOps : undefined,
+    toolFailures: failures.length > 0 ? failures : undefined,
  };
 }
--- a/packages/core/src/agent/context-window/summary-fallback.ts
+++ b/packages/core/src/agent/context-window/summary-fallback.ts
@ -0,0 +1,128 @@
+/**
+ * Summary Fallback — multi-level degradation for summary compaction
+ *
+ * Level 1: Full LLM summary via generateSummary()
+ * Level 2: Exclude oversized messages (> 50% context window), retry summary
+ * Level 3: Plain-text fallback summary (with metadata: file ops + tool failures)
+ */
+
+import type { AgentMessage } from "@mariozechner/pi-agent-core";
+import { generateSummary, estimateTokens } from "@mariozechner/pi-coding-agent";
+import type { Model } from "@mariozechner/pi-ai";
+import {
+  collectToolFailures,
+  collectFileOperations,
+  formatToolFailuresSection,
+  formatFileOperationsSection,
+} from "./compaction-metadata.js";
+
+export type SummarizeWithFallbackParams = {
+  /** Messages to summarize */
+  messages: AgentMessage[];
+  /** LLM model */
+  model: Model<any>;
+  /** Max tokens reserved for summary output */
+  reserveTokens: number;
+  /** API key */
+  apiKey: string;
+  /** AbortSignal */
+  signal?: AbortSignal | undefined;
+  /** Summary instructions */
+  instructions: string;
+  /** Previous summary for incremental context */
+  previousSummary?: string | undefined;
+  /** Available context window tokens (used for oversized-message filtering) */
+  availableTokens: number;
+};
+
+/**
+ * Attempt to generate an LLM summary with multi-level fallback.
+ *
+ * Returns { summary, level } where level indicates which fallback tier succeeded:
+ *   1 = full summary, 2 = filtered summary, 3 = plain-text fallback
+ */
+export async function summarizeWithFallback(
+  params: SummarizeWithFallbackParams,
+): Promise<{ summary: string; level: 1 | 2 | 3 }> {
+  const {
+    messages,
+    model,
+    reserveTokens,
+    apiKey,
+    signal,
+    instructions,
+    previousSummary,
+    availableTokens,
+  } = params;
+
+  // ── Level 1: Full summary ────────────────────────────────────────────
+  try {
+    const summary = await generateSummary(
+      messages,
+      model,
+      reserveTokens,
+      apiKey,
+      signal,
+      instructions,
+      previousSummary,
+    );
+    return { summary, level: 1 };
+  } catch (err) {
+    console.warn(`[summary-fallback] Level 1 (full summary) failed: ${err}`);
+  }
+
+  // ── Level 2: Exclude oversized messages, retry ───────────────────────
+  const oversizeThreshold = availableTokens * 0.5;
+  const filtered = messages.filter((msg) => estimateTokens(msg) <= oversizeThreshold);
+
+  if (filtered.length > 0 && filtered.length < messages.length) {
+    try {
+      const summary = await generateSummary(
+        filtered,
+        model,
+        reserveTokens,
+        apiKey,
+        signal,
+        instructions,
+        previousSummary,
+      );
+      return { summary, level: 2 };
+    } catch (err) {
+      console.warn(`[summary-fallback] Level 2 (filtered summary) failed: ${err}`);
+    }
+  }
+
+  // ── Level 3: Plain-text fallback with metadata ───────────────────────
+  const summary = buildPlainTextFallback(messages, previousSummary);
+  return { summary, level: 3 };
+}
+
+/**
+ * Build a plain-text fallback summary from metadata extraction only (no LLM).
+ */
+function buildPlainTextFallback(
+  messages: AgentMessage[],
+  previousSummary?: string,
+): string {
+  const parts: string[] = [];
+
+  if (previousSummary) {
+    parts.push(`## Previous Context\n${previousSummary}`);
+  }
+
+  parts.push(
+    `## Compaction Note\nLLM summarization was unavailable. ${messages.length} messages were compacted. ` +
+    `Below is automatically extracted metadata from the removed messages.`,
+  );
+
+  // Extract and append metadata (format functions return strings with leading \n,
+  // designed for direct concatenation — so we concatenate rather than join)
+  const failures = collectToolFailures(messages);
+  const fileOps = collectFileOperations(messages);
+
+  let result = parts.join("\n\n");
+  result += formatToolFailuresSection(failures);
+  result += formatFileOperationsSection(fileOps);
+
+  return result;
+}
--- a/packages/core/src/agent/events.ts
+++ b/packages/core/src/agent/events.ts
@ -24,6 +24,8 @@ export type CompactionEndEvent = {
  tokensRemoved?: number | undefined;
  tokensKept?: number | undefined;
  reason: "count" | "tokens" | "summary" | "pruning";
+  /** Generated summary text (only present when reason is "summary") */
+  summary?: string | undefined;
 };

 /** Emitted when an agent encounters an error during execution */
--- a/packages/core/src/agent/runner.ts
+++ b/packages/core/src/agent/runner.ts
@ -271,12 +271,12 @@ export class Agent {
      );
    }

-    // 确定 compaction 模式
-    const compactionMode = options.compactionMode ?? "tokens"; // 默认使用 token 模式
+    // Determine compaction mode (default: summary with LLM-based summarization)
+    const compactionMode = options.compactionMode ?? "summary";

-    // 获取 API Key（用于 summary 模式）
+    // Resolve API key for summary mode (reuse the agent's own key)
    const summaryApiKey = compactionMode === "summary"
-      ? resolveApiKey(this.resolvedProvider, options.apiKey)
+      ? (resolveApiKey(this.resolvedProvider, options.apiKey) ?? this.currentApiKey)
      : undefined;

    // Store reserveTokens for pre-flight compaction
@ -292,7 +292,7 @@ export class Agent {
      reserveTokens: options.reserveTokens,
      targetRatio: options.compactionTargetRatio,
      minKeepMessages: options.minKeepMessages,
-      // Summary 模式参数
+      // Summary mode parameters
      model: compactionMode === "summary" ? model : undefined,
      apiKey: summaryApiKey,
      customInstructions: options.summaryInstructions,
@ -764,6 +764,7 @@ export class Agent {
      tokensRemoved: result.tokensRemoved,
      tokensKept: result.tokensKept,
      reason: result.reason ?? "tokens",
+      summary: result.summary,
    };
    this.emitMulticaEvent(endEvent);
  }
--- a/packages/core/src/agent/session/compaction.ts
+++ b/packages/core/src/agent/session/compaction.ts
@ -19,6 +19,10 @@ export type CompactionResult = {
  tokensKept?: number | undefined;
  /** Summary generated in summary mode */
  summary?: string | undefined;
+  /** File operations extracted from compacted messages */
+  fileOperations?: { readFiles: string[]; modifiedFiles: string[] } | undefined;
+  /** Tool failures extracted from compacted messages */
+  toolFailures?: Array<{ toolName: string; summary: string }> | undefined;
  /** Reason for compaction: count, tokens, summary, or pruning (tool result trimming only) */
  reason: "count" | "tokens" | "summary" | "pruning";
 };
@ -249,6 +253,8 @@ export async function compactMessagesAsync(
    tokensRemoved: result.tokensRemoved,
    tokensKept: result.tokensKept,
    summary: result.summary,
+    fileOperations: result.fileOperations,
+    toolFailures: result.toolFailures,
    reason: "summary",
  };
 }
--- a/packages/core/src/agent/session/session-manager.ts
+++ b/packages/core/src/agent/session/session-manager.ts
@ -95,8 +95,8 @@ export class SessionManager {
    this.sessionId = options.sessionId;
    this.baseDir = options.baseDir;

-    // Compaction mode
-    this.compactionMode = options.compactionMode ?? "count";
+    // Compaction mode (default: summary with LLM-based summarization)
+    this.compactionMode = options.compactionMode ?? "summary";

    // Count mode parameters
    this.maxMessages = options.maxMessages ?? 80;
@ -312,22 +312,37 @@ export class SessionManager {
          minKeepMessages: this.minKeepMessages,
        });
      } else {
-        result = await compactMessagesAsync(workingMessages, {
-          mode: "summary",
-          model,
-          apiKey,
-          contextWindowTokens: this.contextWindowTokens,
-          systemPrompt: this.systemPrompt,
-          reserveTokens: this.reserveTokens,
-          targetRatio: this.targetRatio,
-          minKeepMessages: this.minKeepMessages,
-          customInstructions: this.customInstructions,
-          previousSummary: this.previousSummary,
-        });
+        try {
+          result = await compactMessagesAsync(workingMessages, {
+            mode: "summary",
+            model,
+            apiKey,
+            contextWindowTokens: this.contextWindowTokens,
+            systemPrompt: this.systemPrompt,
+            reserveTokens: this.reserveTokens,
+            targetRatio: this.targetRatio,
+            minKeepMessages: this.minKeepMessages,
+            customInstructions: this.customInstructions,
+            previousSummary: this.previousSummary,
+          });

-        // Save summary for next incremental update
-        if (result?.summary) {
-          this.previousSummary = result.summary;
+          // Save summary for next incremental update
+          if (result?.summary) {
+            this.previousSummary = result.summary;
+          }
+        } catch (err) {
+          // Summary compaction failed entirely — fall back to tokens mode
+          console.error(
+            `[SessionManager] Summary compaction failed, falling back to tokens mode: ${err}`,
+          );
+          result = compactMessages(workingMessages, {
+            mode: "tokens",
+            contextWindowTokens: this.contextWindowTokens,
+            systemPrompt: this.systemPrompt,
+            reserveTokens: this.reserveTokens,
+            targetRatio: this.targetRatio,
+            minKeepMessages: this.minKeepMessages,
+          });
        }
      }
    } else {
--- a/packages/core/src/agent/types.ts
+++ b/packages/core/src/agent/types.ts
@ -51,8 +51,8 @@ export type AgentOptions = {
  /**
   * Compaction mode:
   * - "count": uses legacy message count
-   * - "tokens": uses token awareness (default)
-   * - "summary": uses LLM to generate summary
+   * - "tokens": uses token awareness
+   * - "summary": uses LLM to generate summary (default)
   */
  compactionMode?: "count" | "tokens" | "summary" | undefined;
  /** Compaction target utilization ratio (0-1), defaults to 0.5 */