From ba44de89b712693c951bcc5c3c72258c974dc2d2 Mon Sep 17 00:00:00 2001 From: yushen Date: Fri, 13 Feb 2026 13:26:22 +0800 Subject: [PATCH 1/3] feat(compaction): add metadata extraction and multi-level summary fallback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit New modules for improved compaction: - compaction-metadata.ts: extracts file operations (read/modified) and tool failures from compacted messages, appended to summaries for context retention - summary-fallback.ts: 3-level degradation chain (full LLM summary → filtered summary excluding oversized messages → plain-text fallback with metadata) Co-Authored-By: Claude Opus 4.6 --- .../context-window/compaction-metadata.ts | 156 ++++++++++++++++++ .../agent/context-window/summary-fallback.ts | 128 ++++++++++++++ 2 files changed, 284 insertions(+) create mode 100644 packages/core/src/agent/context-window/compaction-metadata.ts create mode 100644 packages/core/src/agent/context-window/summary-fallback.ts diff --git a/packages/core/src/agent/context-window/compaction-metadata.ts b/packages/core/src/agent/context-window/compaction-metadata.ts new file mode 100644 index 00000000..ca3eb5fa --- /dev/null +++ b/packages/core/src/agent/context-window/compaction-metadata.ts @@ -0,0 +1,156 @@ +/** + * Compaction Metadata — extract file operations & tool failures from compacted messages + * + * Appended to summaries so the agent retains awareness of what files were touched + * and which tool invocations failed, even after the original messages are removed. + */ + +import type { AgentMessage } from "@mariozechner/pi-agent-core"; + +// ── Types ────────────────────────────────────────────────────────────────── + +export type ToolFailure = { + toolName: string; + summary: string; +}; + +export type FileOperations = { + readFiles: string[]; + modifiedFiles: string[]; +}; + +// ── Tool failure extraction ──────────────────────────────────────────────── + +const MAX_TOOL_FAILURES = 8; +const ERROR_SUMMARY_MAX_LEN = 240; + +/** + * Collect tool failures (is_error: true tool_result blocks) from messages. + * Deduplicates by toolCallId and caps at MAX_TOOL_FAILURES. + */ +export function collectToolFailures(messages: AgentMessage[]): ToolFailure[] { + const seen = new Set(); + const failures: ToolFailure[] = []; + + // First pass: collect tool_use names keyed by id + const toolNameById = new Map(); + for (const msg of messages) { + if (msg.role !== "assistant") continue; + const content = (msg as any).content; + if (!Array.isArray(content)) continue; + for (const block of content) { + if (block.type === "tool_use" && block.id && block.name) { + toolNameById.set(block.id, block.name); + } + } + } + + // Second pass: find is_error tool_result blocks + for (const msg of messages) { + if (msg.role !== "user") continue; + const content = (msg as any).content; + if (!Array.isArray(content)) continue; + for (const block of content) { + if (block.type !== "tool_result") continue; + if (!block.is_error) continue; + + const toolCallId: string = block.tool_use_id ?? block.id ?? ""; + if (!toolCallId || seen.has(toolCallId)) continue; + seen.add(toolCallId); + + const toolName = toolNameById.get(toolCallId) ?? "unknown"; + let errorText = typeof block.content === "string" + ? block.content + : Array.isArray(block.content) + ? block.content.map((b: any) => (typeof b === "string" ? b : b.text ?? "")).join(" ") + : String(block.content ?? ""); + if (errorText.length > ERROR_SUMMARY_MAX_LEN) { + errorText = errorText.slice(0, ERROR_SUMMARY_MAX_LEN) + "..."; + } + + failures.push({ toolName, summary: errorText }); + if (failures.length >= MAX_TOOL_FAILURES) return failures; + } + } + + return failures; +} + +// ── File operation extraction ────────────────────────────────────────────── + +const READ_TOOL_NAMES = new Set(["Read", "read_file"]); +const WRITE_TOOL_NAMES = new Set(["Write", "Edit", "write_file", "file_edit"]); + +/** + * Collect file read/modify operations from assistant tool_use blocks. + * readFiles excludes any path that also appears in modifiedFiles. + */ +export function collectFileOperations(messages: AgentMessage[]): FileOperations { + const readSet = new Set(); + const modifiedSet = new Set(); + + for (const msg of messages) { + if (msg.role !== "assistant") continue; + const content = (msg as any).content; + if (!Array.isArray(content)) continue; + + for (const block of content) { + if (block.type !== "tool_use") continue; + const name: string = block.name ?? ""; + const input: any = block.input ?? {}; + + // Extract file path from common parameter names + const filePath: string | undefined = + input.file_path ?? input.path ?? input.filePath ?? input.filename; + if (!filePath || typeof filePath !== "string") continue; + + if (READ_TOOL_NAMES.has(name)) { + readSet.add(filePath); + } else if (WRITE_TOOL_NAMES.has(name)) { + modifiedSet.add(filePath); + } + } + } + + // Remove modified files from readFiles (to avoid duplication) + for (const path of modifiedSet) { + readSet.delete(path); + } + + return { + readFiles: [...readSet], + modifiedFiles: [...modifiedSet], + }; +} + +// ── Formatting ───────────────────────────────────────────────────────────── + +/** + * Format tool failures as a markdown section. + * Returns empty string if no failures. + */ +export function formatToolFailuresSection(failures: ToolFailure[]): string { + if (failures.length === 0) return ""; + + const lines = failures.map( + (f) => `- **${f.toolName}**: ${f.summary}`, + ); + return `\n## Tool Failures\n${lines.join("\n")}`; +} + +/** + * Format file operations as XML sections. + * Returns empty string if no operations. + */ +export function formatFileOperationsSection(ops: FileOperations): string { + const parts: string[] = []; + + if (ops.readFiles.length > 0) { + parts.push(`\n${ops.readFiles.join("\n")}\n`); + } + if (ops.modifiedFiles.length > 0) { + parts.push(`\n${ops.modifiedFiles.join("\n")}\n`); + } + + return parts.length > 0 ? "\n" + parts.join("\n") : ""; +} diff --git a/packages/core/src/agent/context-window/summary-fallback.ts b/packages/core/src/agent/context-window/summary-fallback.ts new file mode 100644 index 00000000..485c2eb1 --- /dev/null +++ b/packages/core/src/agent/context-window/summary-fallback.ts @@ -0,0 +1,128 @@ +/** + * Summary Fallback — multi-level degradation for summary compaction + * + * Level 1: Full LLM summary via generateSummary() + * Level 2: Exclude oversized messages (> 50% context window), retry summary + * Level 3: Plain-text fallback summary (with metadata: file ops + tool failures) + */ + +import type { AgentMessage } from "@mariozechner/pi-agent-core"; +import { generateSummary, estimateTokens } from "@mariozechner/pi-coding-agent"; +import type { Model } from "@mariozechner/pi-ai"; +import { + collectToolFailures, + collectFileOperations, + formatToolFailuresSection, + formatFileOperationsSection, +} from "./compaction-metadata.js"; + +export type SummarizeWithFallbackParams = { + /** Messages to summarize */ + messages: AgentMessage[]; + /** LLM model */ + model: Model; + /** Max tokens reserved for summary output */ + reserveTokens: number; + /** API key */ + apiKey: string; + /** AbortSignal */ + signal?: AbortSignal | undefined; + /** Summary instructions */ + instructions: string; + /** Previous summary for incremental context */ + previousSummary?: string | undefined; + /** Available context window tokens (used for oversized-message filtering) */ + availableTokens: number; +}; + +/** + * Attempt to generate an LLM summary with multi-level fallback. + * + * Returns { summary, level } where level indicates which fallback tier succeeded: + * 1 = full summary, 2 = filtered summary, 3 = plain-text fallback + */ +export async function summarizeWithFallback( + params: SummarizeWithFallbackParams, +): Promise<{ summary: string; level: 1 | 2 | 3 }> { + const { + messages, + model, + reserveTokens, + apiKey, + signal, + instructions, + previousSummary, + availableTokens, + } = params; + + // ── Level 1: Full summary ──────────────────────────────────────────── + try { + const summary = await generateSummary( + messages, + model, + reserveTokens, + apiKey, + signal, + instructions, + previousSummary, + ); + return { summary, level: 1 }; + } catch (err) { + console.warn(`[summary-fallback] Level 1 (full summary) failed: ${err}`); + } + + // ── Level 2: Exclude oversized messages, retry ─────────────────────── + const oversizeThreshold = availableTokens * 0.5; + const filtered = messages.filter((msg) => estimateTokens(msg) <= oversizeThreshold); + + if (filtered.length > 0 && filtered.length < messages.length) { + try { + const summary = await generateSummary( + filtered, + model, + reserveTokens, + apiKey, + signal, + instructions, + previousSummary, + ); + return { summary, level: 2 }; + } catch (err) { + console.warn(`[summary-fallback] Level 2 (filtered summary) failed: ${err}`); + } + } + + // ── Level 3: Plain-text fallback with metadata ─────────────────────── + const summary = buildPlainTextFallback(messages, previousSummary); + return { summary, level: 3 }; +} + +/** + * Build a plain-text fallback summary from metadata extraction only (no LLM). + */ +function buildPlainTextFallback( + messages: AgentMessage[], + previousSummary?: string, +): string { + const parts: string[] = []; + + if (previousSummary) { + parts.push(`## Previous Context\n${previousSummary}`); + } + + parts.push( + `## Compaction Note\nLLM summarization was unavailable. ${messages.length} messages were compacted. ` + + `Below is automatically extracted metadata from the removed messages.`, + ); + + // Extract and append metadata + const failures = collectToolFailures(messages); + const fileOps = collectFileOperations(messages); + const failureSection = formatToolFailuresSection(failures); + const fileOpsSection = formatFileOperationsSection(fileOps); + + if (failureSection) parts.push(failureSection); + if (fileOpsSection) parts.push(fileOpsSection); + + return parts.join("\n\n"); +} From 9e7907a81464ce26ae582e62be551975ab8b947c Mon Sep 17 00:00:00 2001 From: yushen Date: Fri, 13 Feb 2026 13:26:34 +0800 Subject: [PATCH 2/3] feat(compaction): switch default to summary mode with split-turn and adaptive chunking - Default compaction mode changed from "tokens" to "summary" (LLM-based) - Split turn detection: handles orphaned tool_result messages at compaction boundary by summarizing the split turn prefix separately - Adaptive chunk sizing: dynamically adjusts chunk ratio (0.15-0.4) based on average message token count instead of fixed 50k tokens - Fallback chain: summary compaction wraps in try/catch, falling back to tokens mode on complete failure; summarizeWithFallback provides 3-level degradation within summary mode itself - Metadata appended to summaries: file operations + tool failures sections - CompactionResult extended with fileOperations and toolFailures fields - CompactionEndEvent gains optional summary field - API key resolution improved: reuses agent's own key for summary generation Backward compatible: explicit "count" or "tokens" mode behaves identically. Co-Authored-By: Claude Opus 4.6 --- .../core/src/agent/context-window/index.ts | 14 + .../src/agent/context-window/summarization.ts | 283 ++++++++++++++++-- packages/core/src/agent/events.ts | 2 + packages/core/src/agent/runner.ts | 11 +- packages/core/src/agent/session/compaction.ts | 6 + .../core/src/agent/session/session-manager.ts | 49 +-- packages/core/src/agent/types.ts | 4 +- 7 files changed, 319 insertions(+), 50 deletions(-) diff --git a/packages/core/src/agent/context-window/index.ts b/packages/core/src/agent/context-window/index.ts index 440d6b28..8d04b64f 100644 --- a/packages/core/src/agent/context-window/index.ts +++ b/packages/core/src/agent/context-window/index.ts @@ -41,10 +41,24 @@ export { export type { SummaryCompactionResult, SummaryCompactionParams } from "./summarization.js"; export { splitMessagesForSummary, + detectSplitTurn, + computeAdaptiveChunkRatio, compactMessagesWithSummary, compactMessagesWithChunkedSummary, } from "./summarization.js"; +// Summary fallback +export { summarizeWithFallback } from "./summary-fallback.js"; + +// Compaction metadata +export { + collectToolFailures, + collectFileOperations, + formatToolFailuresSection, + formatFileOperationsSection, +} from "./compaction-metadata.js"; +export type { ToolFailure, FileOperations } from "./compaction-metadata.js"; + // Tool result pruning export type { ToolResultPruningSettings, diff --git a/packages/core/src/agent/context-window/summarization.ts b/packages/core/src/agent/context-window/summarization.ts index dbb69f85..c859bcaf 100644 --- a/packages/core/src/agent/context-window/summarization.ts +++ b/packages/core/src/agent/context-window/summarization.ts @@ -1,13 +1,22 @@ /** * Summary-based Compaction * - * Uses LLM to generate summaries of historical messages instead of simple truncation + * Uses LLM to generate summaries of historical messages instead of simple truncation. + * Includes split-turn detection, adaptive chunk sizing, multi-level fallback, + * and metadata extraction (file operations + tool failures). */ import type { AgentMessage } from "@mariozechner/pi-agent-core"; -import { generateSummary, estimateTokens } from "@mariozechner/pi-coding-agent"; +import { estimateTokens } from "@mariozechner/pi-coding-agent"; import type { Model } from "@mariozechner/pi-ai"; import { estimateMessagesTokens } from "./token-estimation.js"; +import { summarizeWithFallback } from "./summary-fallback.js"; +import { + collectToolFailures, + collectFileOperations, + formatToolFailuresSection, + formatFileOperationsSection, +} from "./compaction-metadata.js"; /** Summary compaction result */ export type SummaryCompactionResult = { @@ -23,6 +32,10 @@ export type SummaryCompactionResult = { summary: string; /** Compaction reason */ reason: "summary"; + /** File operations extracted from compacted messages */ + fileOperations?: { readFiles: string[]; modifiedFiles: string[] } | undefined; + /** Tool failures extracted from compacted messages */ + toolFailures?: Array<{ toolName: string; summary: string }> | undefined; }; /** Summary compaction parameters */ @@ -49,7 +62,7 @@ export type SummaryCompactionParams = { signal?: AbortSignal | undefined; }; -/** 默认摘要提示词 */ +/** Default summary instructions */ const DEFAULT_SUMMARY_INSTRUCTIONS = `Summarize the conversation history concisely, focusing on: - Key decisions made - Important context and constraints @@ -114,6 +127,99 @@ export function splitMessagesForSummary( return { toSummarize, toKeep }; } +// ── Split Turn Detection ─────────────────────────────────────────────────── + +/** + * Detect and fix a "split turn" — when the first kept message is a user message + * containing tool_result blocks without the corresponding assistant tool_use. + * + * When detected, moves the orphaned tool_result (and its preceding assistant + * tool_use) from toKeep back to toSummarize so they can be summarized together. + * + * Returns { splitPrefix } containing the separated turn prefix messages, + * or null if no split turn was detected. The caller should summarize the + * prefix separately and prepend it. + */ +export function detectSplitTurn( + toSummarize: AgentMessage[], + toKeep: AgentMessage[], +): { splitPrefix: AgentMessage[]; adjustedToKeep: AgentMessage[] } | null { + if (toKeep.length === 0) return null; + + const firstKept = toKeep[0]!; + if (firstKept.role !== "user") return null; + + // Check if this user message has tool_result blocks + const content = (firstKept as any).content; + if (!Array.isArray(content)) return null; + + const hasToolResult = content.some((b: any) => b.type === "tool_result"); + if (!hasToolResult) return null; + + // This is an orphaned tool_result — look back in toSummarize for the assistant tool_use + // Find the last assistant message in toSummarize that has matching tool_use + const toolResultIds = new Set( + content + .filter((b: any) => b.type === "tool_result") + .map((b: any) => b.tool_use_id ?? b.id) + .filter(Boolean), + ); + + // Walk backwards through toSummarize to find the assistant with matching tool_use + let assistantIndex = -1; + for (let i = toSummarize.length - 1; i >= 0; i--) { + const msg = toSummarize[i]!; + if (msg.role !== "assistant") continue; + + const assistantContent = (msg as any).content; + if (!Array.isArray(assistantContent)) continue; + + const hasMatchingToolUse = assistantContent.some( + (b: any) => b.type === "tool_use" && toolResultIds.has(b.id), + ); + if (hasMatchingToolUse) { + assistantIndex = i; + break; + } + } + + if (assistantIndex < 0) return null; + + // Separate the split prefix: assistant tool_use + orphaned user tool_result + const splitPrefix = [ + ...toSummarize.slice(assistantIndex), + firstKept, + ]; + const adjustedToKeep = toKeep.slice(1); + + return { splitPrefix, adjustedToKeep }; +} + +// ── Adaptive Chunk Ratio ─────────────────────────────────────────────────── + +const ADAPTIVE_CHUNK_MIN = 0.15; +const ADAPTIVE_CHUNK_MAX = 0.4; + +/** + * Compute adaptive chunk size based on average message token count. + * Larger average messages → smaller chunks (to avoid exceeding limits). + * Range: [0.15, 0.4] × contextWindow + */ +export function computeAdaptiveChunkRatio( + messages: AgentMessage[], + contextWindow: number, +): number { + if (messages.length === 0) return ADAPTIVE_CHUNK_MAX; + + const totalTokens = estimateMessagesTokens(messages); + const avgTokens = totalTokens / messages.length; + + // Scale inversely: high avg → low ratio + // avgTokens ~500 → ratio ~0.4; avgTokens ~5000+ → ratio ~0.15 + const ratio = ADAPTIVE_CHUNK_MAX - (avgTokens / 10000) * (ADAPTIVE_CHUNK_MAX - ADAPTIVE_CHUNK_MIN); + return Math.max(ADAPTIVE_CHUNK_MIN, Math.min(ADAPTIVE_CHUNK_MAX, ratio)); +} + /** * Create summary message */ @@ -150,7 +256,7 @@ export async function compactMessagesWithSummary( signal, } = params; - // 分割消息 + // Split messages const split = splitMessagesForSummary(messages, availableTokens, { targetRatio, minKeepMessages, @@ -160,43 +266,85 @@ export async function compactMessagesWithSummary( return null; } - const { toSummarize, toKeep } = split; + let { toSummarize, toKeep } = split; - // Generate summary + // Detect and handle split turn + const splitTurn = detectSplitTurn(toSummarize, toKeep); + let splitPrefixSummary = ""; + + if (splitTurn) { + toKeep = splitTurn.adjustedToKeep; + + // Summarize the split prefix separately + const instructions = customInstructions || DEFAULT_SUMMARY_INSTRUCTIONS; + const prefixResult = await summarizeWithFallback({ + messages: splitTurn.splitPrefix, + model, + reserveTokens, + apiKey, + signal, + instructions, + previousSummary, + availableTokens, + }); + splitPrefixSummary = prefixResult.summary; + } + + // Generate summary with fallback const instructions = customInstructions || DEFAULT_SUMMARY_INSTRUCTIONS; - const summary = await generateSummary( - toSummarize, + const { summary } = await summarizeWithFallback({ + messages: toSummarize, model, reserveTokens, apiKey, signal, instructions, previousSummary, - ); + availableTokens, + }); + + // Append split prefix summary if present + let finalSummary = summary; + if (splitPrefixSummary) { + finalSummary += `\n\n## Split Turn Context\n${splitPrefixSummary}`; + } + + // Append metadata sections + const allCompactedMessages = splitTurn + ? [...toSummarize, ...splitTurn.splitPrefix] + : toSummarize; + const failures = collectToolFailures(allCompactedMessages); + const fileOps = collectFileOperations(allCompactedMessages); + + finalSummary += formatToolFailuresSection(failures); + finalSummary += formatFileOperationsSection(fileOps); // Create summary message - const summaryMessage = createSummaryMessage(summary, previousSummary); + const summaryMessage = createSummaryMessage(finalSummary, previousSummary); // Combine results const kept = [summaryMessage, ...toKeep]; - const tokensRemoved = estimateMessagesTokens(toSummarize); + const tokensRemoved = estimateMessagesTokens(allCompactedMessages); const tokensKept = estimateMessagesTokens(kept); return { kept, - removedCount: toSummarize.length, + removedCount: allCompactedMessages.length, tokensRemoved, tokensKept, - summary, + summary: finalSummary, reason: "summary", + fileOperations: (fileOps.readFiles.length > 0 || fileOps.modifiedFiles.length > 0) ? fileOps : undefined, + toolFailures: failures.length > 0 ? failures : undefined, }; } /** * Generate summary in chunks (for very large history) * - * When history is too large, generate summaries by chunks then merge + * When history is too large, generate summaries by chunks then merge. + * Uses adaptive chunk sizing and multi-level fallback. */ export async function compactMessagesWithChunkedSummary( params: SummaryCompactionParams & { @@ -214,7 +362,6 @@ export async function compactMessagesWithChunkedSummary( customInstructions, previousSummary, signal, - maxChunkTokens = 50000, } = params; // Split messages @@ -227,12 +374,79 @@ export async function compactMessagesWithChunkedSummary( return null; } - const { toSummarize, toKeep } = split; + let { toSummarize, toKeep } = split; - // If messages to summarize are not many, summarize directly + // Detect and handle split turn + const splitTurn = detectSplitTurn(toSummarize, toKeep); + let splitPrefixSummary = ""; + + if (splitTurn) { + toKeep = splitTurn.adjustedToKeep; + + // Summarize the split prefix separately + const instructions = customInstructions || DEFAULT_SUMMARY_INSTRUCTIONS; + const prefixResult = await summarizeWithFallback({ + messages: splitTurn.splitPrefix, + model, + reserveTokens, + apiKey, + signal, + instructions, + previousSummary, + availableTokens, + }); + splitPrefixSummary = prefixResult.summary; + } + + // Compute adaptive chunk size + const chunkRatio = computeAdaptiveChunkRatio(toSummarize, availableTokens); + const maxChunkTokens = params.maxChunkTokens ?? Math.floor(availableTokens * chunkRatio); + + // If messages to summarize fit in one chunk, delegate to single-pass compaction const toSummarizeTokens = estimateMessagesTokens(toSummarize); if (toSummarizeTokens <= maxChunkTokens) { - return compactMessagesWithSummary(params); + // For single-chunk, use the non-chunked path but still handle split turn + const instructions = customInstructions || DEFAULT_SUMMARY_INSTRUCTIONS; + const { summary } = await summarizeWithFallback({ + messages: toSummarize, + model, + reserveTokens, + apiKey, + signal, + instructions, + previousSummary, + availableTokens, + }); + + let finalSummary = summary; + if (splitPrefixSummary) { + finalSummary += `\n\n## Split Turn Context\n${splitPrefixSummary}`; + } + + // Append metadata + const allCompactedMessages = splitTurn + ? [...toSummarize, ...splitTurn.splitPrefix] + : toSummarize; + const failures = collectToolFailures(allCompactedMessages); + const fileOps = collectFileOperations(allCompactedMessages); + finalSummary += formatToolFailuresSection(failures); + finalSummary += formatFileOperationsSection(fileOps); + + const summaryMessage = createSummaryMessage(finalSummary); + const kept = [summaryMessage, ...toKeep]; + const tokensRemoved = estimateMessagesTokens(allCompactedMessages); + const tokensKept = estimateMessagesTokens(kept); + + return { + kept, + removedCount: allCompactedMessages.length, + tokensRemoved, + tokensKept, + summary: finalSummary, + reason: "summary", + fileOperations: (fileOps.readFiles.length > 0 || fileOps.modifiedFiles.length > 0) ? fileOps : undefined, + toolFailures: failures.length > 0 ? failures : undefined, + }; } // Process in chunks @@ -257,27 +471,42 @@ export async function compactMessagesWithChunkedSummary( chunks.push(currentChunk); } - // Generate summary for each chunk + // Generate summary for each chunk with fallback const instructions = customInstructions || DEFAULT_SUMMARY_INSTRUCTIONS; const chunkSummaries: string[] = []; let runningContext = previousSummary; for (const chunk of chunks) { - const chunkSummary = await generateSummary( - chunk, + const { summary: chunkSummary } = await summarizeWithFallback({ + messages: chunk, model, reserveTokens, apiKey, signal, instructions, - runningContext, - ); + previousSummary: runningContext, + availableTokens, + }); chunkSummaries.push(chunkSummary); runningContext = chunkSummary; } // Final summary is the last chunk's summary (already includes previous context) - const finalSummary = chunkSummaries[chunkSummaries.length - 1] ?? ""; + let finalSummary = chunkSummaries[chunkSummaries.length - 1] ?? ""; + + // Append split prefix summary if present + if (splitPrefixSummary) { + finalSummary += `\n\n## Split Turn Context\n${splitPrefixSummary}`; + } + + // Append metadata sections + const allCompactedMessages = splitTurn + ? [...toSummarize, ...splitTurn.splitPrefix] + : toSummarize; + const failures = collectToolFailures(allCompactedMessages); + const fileOps = collectFileOperations(allCompactedMessages); + finalSummary += formatToolFailuresSection(failures); + finalSummary += formatFileOperationsSection(fileOps); // Create summary message const summaryMessage = createSummaryMessage(finalSummary); @@ -285,15 +514,17 @@ export async function compactMessagesWithChunkedSummary( // Combine results const kept = [summaryMessage, ...toKeep]; - const tokensRemoved = estimateMessagesTokens(toSummarize); + const tokensRemoved = estimateMessagesTokens(allCompactedMessages); const tokensKept = estimateMessagesTokens(kept); return { kept, - removedCount: toSummarize.length, + removedCount: allCompactedMessages.length, tokensRemoved, tokensKept, summary: finalSummary, reason: "summary", + fileOperations: (fileOps.readFiles.length > 0 || fileOps.modifiedFiles.length > 0) ? fileOps : undefined, + toolFailures: failures.length > 0 ? failures : undefined, }; } diff --git a/packages/core/src/agent/events.ts b/packages/core/src/agent/events.ts index 3ae35b64..139048da 100644 --- a/packages/core/src/agent/events.ts +++ b/packages/core/src/agent/events.ts @@ -24,6 +24,8 @@ export type CompactionEndEvent = { tokensRemoved?: number | undefined; tokensKept?: number | undefined; reason: "count" | "tokens" | "summary" | "pruning"; + /** Generated summary text (only present when reason is "summary") */ + summary?: string | undefined; }; /** Emitted when an agent encounters an error during execution */ diff --git a/packages/core/src/agent/runner.ts b/packages/core/src/agent/runner.ts index 92ce68dd..dae48925 100644 --- a/packages/core/src/agent/runner.ts +++ b/packages/core/src/agent/runner.ts @@ -271,12 +271,12 @@ export class Agent { ); } - // 确定 compaction 模式 - const compactionMode = options.compactionMode ?? "tokens"; // 默认使用 token 模式 + // Determine compaction mode (default: summary with LLM-based summarization) + const compactionMode = options.compactionMode ?? "summary"; - // 获取 API Key(用于 summary 模式) + // Resolve API key for summary mode (reuse the agent's own key) const summaryApiKey = compactionMode === "summary" - ? resolveApiKey(this.resolvedProvider, options.apiKey) + ? (resolveApiKey(this.resolvedProvider, options.apiKey) ?? this.currentApiKey) : undefined; // Store reserveTokens for pre-flight compaction @@ -292,7 +292,7 @@ export class Agent { reserveTokens: options.reserveTokens, targetRatio: options.compactionTargetRatio, minKeepMessages: options.minKeepMessages, - // Summary 模式参数 + // Summary mode parameters model: compactionMode === "summary" ? model : undefined, apiKey: summaryApiKey, customInstructions: options.summaryInstructions, @@ -764,6 +764,7 @@ export class Agent { tokensRemoved: result.tokensRemoved, tokensKept: result.tokensKept, reason: result.reason ?? "tokens", + summary: result.summary, }; this.emitMulticaEvent(endEvent); } diff --git a/packages/core/src/agent/session/compaction.ts b/packages/core/src/agent/session/compaction.ts index 18aadb3a..9dfaa40c 100644 --- a/packages/core/src/agent/session/compaction.ts +++ b/packages/core/src/agent/session/compaction.ts @@ -19,6 +19,10 @@ export type CompactionResult = { tokensKept?: number | undefined; /** Summary generated in summary mode */ summary?: string | undefined; + /** File operations extracted from compacted messages */ + fileOperations?: { readFiles: string[]; modifiedFiles: string[] } | undefined; + /** Tool failures extracted from compacted messages */ + toolFailures?: Array<{ toolName: string; summary: string }> | undefined; /** Reason for compaction: count, tokens, summary, or pruning (tool result trimming only) */ reason: "count" | "tokens" | "summary" | "pruning"; }; @@ -249,6 +253,8 @@ export async function compactMessagesAsync( tokensRemoved: result.tokensRemoved, tokensKept: result.tokensKept, summary: result.summary, + fileOperations: result.fileOperations, + toolFailures: result.toolFailures, reason: "summary", }; } diff --git a/packages/core/src/agent/session/session-manager.ts b/packages/core/src/agent/session/session-manager.ts index df494d61..05f229e3 100644 --- a/packages/core/src/agent/session/session-manager.ts +++ b/packages/core/src/agent/session/session-manager.ts @@ -95,8 +95,8 @@ export class SessionManager { this.sessionId = options.sessionId; this.baseDir = options.baseDir; - // Compaction mode - this.compactionMode = options.compactionMode ?? "count"; + // Compaction mode (default: summary with LLM-based summarization) + this.compactionMode = options.compactionMode ?? "summary"; // Count mode parameters this.maxMessages = options.maxMessages ?? 80; @@ -312,22 +312,37 @@ export class SessionManager { minKeepMessages: this.minKeepMessages, }); } else { - result = await compactMessagesAsync(workingMessages, { - mode: "summary", - model, - apiKey, - contextWindowTokens: this.contextWindowTokens, - systemPrompt: this.systemPrompt, - reserveTokens: this.reserveTokens, - targetRatio: this.targetRatio, - minKeepMessages: this.minKeepMessages, - customInstructions: this.customInstructions, - previousSummary: this.previousSummary, - }); + try { + result = await compactMessagesAsync(workingMessages, { + mode: "summary", + model, + apiKey, + contextWindowTokens: this.contextWindowTokens, + systemPrompt: this.systemPrompt, + reserveTokens: this.reserveTokens, + targetRatio: this.targetRatio, + minKeepMessages: this.minKeepMessages, + customInstructions: this.customInstructions, + previousSummary: this.previousSummary, + }); - // Save summary for next incremental update - if (result?.summary) { - this.previousSummary = result.summary; + // Save summary for next incremental update + if (result?.summary) { + this.previousSummary = result.summary; + } + } catch (err) { + // Summary compaction failed entirely — fall back to tokens mode + console.error( + `[SessionManager] Summary compaction failed, falling back to tokens mode: ${err}`, + ); + result = compactMessages(workingMessages, { + mode: "tokens", + contextWindowTokens: this.contextWindowTokens, + systemPrompt: this.systemPrompt, + reserveTokens: this.reserveTokens, + targetRatio: this.targetRatio, + minKeepMessages: this.minKeepMessages, + }); } } } else { diff --git a/packages/core/src/agent/types.ts b/packages/core/src/agent/types.ts index cc719f4d..8d7ee999 100644 --- a/packages/core/src/agent/types.ts +++ b/packages/core/src/agent/types.ts @@ -51,8 +51,8 @@ export type AgentOptions = { /** * Compaction mode: * - "count": uses legacy message count - * - "tokens": uses token awareness (default) - * - "summary": uses LLM to generate summary + * - "tokens": uses token awareness + * - "summary": uses LLM to generate summary (default) */ compactionMode?: "count" | "tokens" | "summary" | undefined; /** Compaction target utilization ratio (0-1), defaults to 0.5 */ From d7e85d0c2566bb7afbdecc81558a36453b1505e6 Mon Sep 17 00:00:00 2001 From: yushen Date: Fri, 13 Feb 2026 13:33:31 +0800 Subject: [PATCH 3/3] fix(compaction): prevent double-counting in split-turn detection - detectSplitTurn now returns adjustedToSummarize (truncated at assistantIndex) so split prefix messages are not summarized twice and removedCount is not inflated - Remove unused contextWindow parameter from computeAdaptiveChunkRatio - Remove duplicated single-chunk fast path in compactMessagesWithChunkedSummary (the multi-chunk loop handles 1 chunk naturally) - Fix triple-newline formatting in buildPlainTextFallback by concatenating metadata sections directly instead of joining through parts array Co-Authored-By: Claude Opus 4.6 --- .../src/agent/context-window/summarization.ts | 122 ++++++------------ .../agent/context-window/summary-fallback.ts | 12 +- 2 files changed, 48 insertions(+), 86 deletions(-) diff --git a/packages/core/src/agent/context-window/summarization.ts b/packages/core/src/agent/context-window/summarization.ts index c859bcaf..1ff99c11 100644 --- a/packages/core/src/agent/context-window/summarization.ts +++ b/packages/core/src/agent/context-window/summarization.ts @@ -133,17 +133,20 @@ export function splitMessagesForSummary( * Detect and fix a "split turn" — when the first kept message is a user message * containing tool_result blocks without the corresponding assistant tool_use. * - * When detected, moves the orphaned tool_result (and its preceding assistant - * tool_use) from toKeep back to toSummarize so they can be summarized together. + * When detected, separates the orphaned turn (assistant tool_use + user tool_result) + * into a `splitPrefix` for separate summarization, and returns adjusted arrays + * where `toSummarize` no longer contains those messages. * - * Returns { splitPrefix } containing the separated turn prefix messages, - * or null if no split turn was detected. The caller should summarize the - * prefix separately and prepend it. + * Returns null if no split turn was detected. */ export function detectSplitTurn( toSummarize: AgentMessage[], toKeep: AgentMessage[], -): { splitPrefix: AgentMessage[]; adjustedToKeep: AgentMessage[] } | null { +): { + splitPrefix: AgentMessage[]; + adjustedToSummarize: AgentMessage[]; + adjustedToKeep: AgentMessage[]; +} | null { if (toKeep.length === 0) return null; const firstKept = toKeep[0]!; @@ -157,7 +160,6 @@ export function detectSplitTurn( if (!hasToolResult) return null; // This is an orphaned tool_result — look back in toSummarize for the assistant tool_use - // Find the last assistant message in toSummarize that has matching tool_use const toolResultIds = new Set( content .filter((b: any) => b.type === "tool_result") @@ -185,14 +187,16 @@ export function detectSplitTurn( if (assistantIndex < 0) return null; - // Separate the split prefix: assistant tool_use + orphaned user tool_result + // Split prefix: messages from assistantIndex to end of toSummarize + orphaned firstKept const splitPrefix = [ ...toSummarize.slice(assistantIndex), firstKept, ]; + // Truncate toSummarize so the split prefix messages are NOT double-counted + const adjustedToSummarize = toSummarize.slice(0, assistantIndex); const adjustedToKeep = toKeep.slice(1); - return { splitPrefix, adjustedToKeep }; + return { splitPrefix, adjustedToSummarize, adjustedToKeep }; } // ── Adaptive Chunk Ratio ─────────────────────────────────────────────────── @@ -201,13 +205,12 @@ const ADAPTIVE_CHUNK_MIN = 0.15; const ADAPTIVE_CHUNK_MAX = 0.4; /** - * Compute adaptive chunk size based on average message token count. - * Larger average messages → smaller chunks (to avoid exceeding limits). - * Range: [0.15, 0.4] × contextWindow + * Compute adaptive chunk ratio based on average message token count. + * Larger average messages → smaller ratio (to avoid exceeding limits). + * Return value range: [0.15, 0.4] — multiply by availableTokens to get chunk size. */ export function computeAdaptiveChunkRatio( messages: AgentMessage[], - contextWindow: number, ): number { if (messages.length === 0) return ADAPTIVE_CHUNK_MAX; @@ -273,6 +276,7 @@ export async function compactMessagesWithSummary( let splitPrefixSummary = ""; if (splitTurn) { + toSummarize = splitTurn.adjustedToSummarize; toKeep = splitTurn.adjustedToKeep; // Summarize the split prefix separately @@ -290,26 +294,30 @@ export async function compactMessagesWithSummary( splitPrefixSummary = prefixResult.summary; } - // Generate summary with fallback + // Generate summary with fallback (toSummarize no longer contains split prefix messages) const instructions = customInstructions || DEFAULT_SUMMARY_INSTRUCTIONS; - const { summary } = await summarizeWithFallback({ - messages: toSummarize, - model, - reserveTokens, - apiKey, - signal, - instructions, - previousSummary, - availableTokens, - }); + let finalSummary = ""; - // Append split prefix summary if present - let finalSummary = summary; - if (splitPrefixSummary) { - finalSummary += `\n\n## Split Turn Context\n${splitPrefixSummary}`; + if (toSummarize.length > 0) { + const { summary } = await summarizeWithFallback({ + messages: toSummarize, + model, + reserveTokens, + apiKey, + signal, + instructions, + previousSummary, + availableTokens, + }); + finalSummary = summary; } - // Append metadata sections + // Append split prefix summary if present + if (splitPrefixSummary) { + finalSummary += (finalSummary ? "\n\n" : "") + `## Split Turn Context\n${splitPrefixSummary}`; + } + + // Append metadata sections (all compacted = adjusted toSummarize + splitPrefix) const allCompactedMessages = splitTurn ? [...toSummarize, ...splitTurn.splitPrefix] : toSummarize; @@ -381,6 +389,7 @@ export async function compactMessagesWithChunkedSummary( let splitPrefixSummary = ""; if (splitTurn) { + toSummarize = splitTurn.adjustedToSummarize; toKeep = splitTurn.adjustedToKeep; // Summarize the split prefix separately @@ -399,57 +408,10 @@ export async function compactMessagesWithChunkedSummary( } // Compute adaptive chunk size - const chunkRatio = computeAdaptiveChunkRatio(toSummarize, availableTokens); + const chunkRatio = computeAdaptiveChunkRatio(toSummarize); const maxChunkTokens = params.maxChunkTokens ?? Math.floor(availableTokens * chunkRatio); - // If messages to summarize fit in one chunk, delegate to single-pass compaction - const toSummarizeTokens = estimateMessagesTokens(toSummarize); - if (toSummarizeTokens <= maxChunkTokens) { - // For single-chunk, use the non-chunked path but still handle split turn - const instructions = customInstructions || DEFAULT_SUMMARY_INSTRUCTIONS; - const { summary } = await summarizeWithFallback({ - messages: toSummarize, - model, - reserveTokens, - apiKey, - signal, - instructions, - previousSummary, - availableTokens, - }); - - let finalSummary = summary; - if (splitPrefixSummary) { - finalSummary += `\n\n## Split Turn Context\n${splitPrefixSummary}`; - } - - // Append metadata - const allCompactedMessages = splitTurn - ? [...toSummarize, ...splitTurn.splitPrefix] - : toSummarize; - const failures = collectToolFailures(allCompactedMessages); - const fileOps = collectFileOperations(allCompactedMessages); - finalSummary += formatToolFailuresSection(failures); - finalSummary += formatFileOperationsSection(fileOps); - - const summaryMessage = createSummaryMessage(finalSummary); - const kept = [summaryMessage, ...toKeep]; - const tokensRemoved = estimateMessagesTokens(allCompactedMessages); - const tokensKept = estimateMessagesTokens(kept); - - return { - kept, - removedCount: allCompactedMessages.length, - tokensRemoved, - tokensKept, - summary: finalSummary, - reason: "summary", - fileOperations: (fileOps.readFiles.length > 0 || fileOps.modifiedFiles.length > 0) ? fileOps : undefined, - toolFailures: failures.length > 0 ? failures : undefined, - }; - } - - // Process in chunks + // Process in chunks (works naturally for single-chunk case too) const chunks: AgentMessage[][] = []; let currentChunk: AgentMessage[] = []; let currentTokens = 0; @@ -496,10 +458,10 @@ export async function compactMessagesWithChunkedSummary( // Append split prefix summary if present if (splitPrefixSummary) { - finalSummary += `\n\n## Split Turn Context\n${splitPrefixSummary}`; + finalSummary += (finalSummary ? "\n\n" : "") + `## Split Turn Context\n${splitPrefixSummary}`; } - // Append metadata sections + // Append metadata sections (all compacted = adjusted toSummarize + splitPrefix) const allCompactedMessages = splitTurn ? [...toSummarize, ...splitTurn.splitPrefix] : toSummarize; diff --git a/packages/core/src/agent/context-window/summary-fallback.ts b/packages/core/src/agent/context-window/summary-fallback.ts index 485c2eb1..da11013c 100644 --- a/packages/core/src/agent/context-window/summary-fallback.ts +++ b/packages/core/src/agent/context-window/summary-fallback.ts @@ -115,14 +115,14 @@ function buildPlainTextFallback( `Below is automatically extracted metadata from the removed messages.`, ); - // Extract and append metadata + // Extract and append metadata (format functions return strings with leading \n, + // designed for direct concatenation — so we concatenate rather than join) const failures = collectToolFailures(messages); const fileOps = collectFileOperations(messages); - const failureSection = formatToolFailuresSection(failures); - const fileOpsSection = formatFileOperationsSection(fileOps); - if (failureSection) parts.push(failureSection); - if (fileOpsSection) parts.push(fileOpsSection); + let result = parts.join("\n\n"); + result += formatToolFailuresSection(failures); + result += formatFileOperationsSection(fileOps); - return parts.join("\n\n"); + return result; }