Merge pull request #157 from multica-ai/feat/improve-compaction

feat(compaction): improve summary mode with fallback, split-turn, and metadata
This commit is contained in:
LinYushen 2026-02-13 19:15:38 +08:00 committed by GitHub
commit 882dc8592b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 574 additions and 59 deletions

View file

@ -0,0 +1,156 @@
/**
* Compaction Metadata extract file operations & tool failures from compacted messages
*
* Appended to summaries so the agent retains awareness of what files were touched
* and which tool invocations failed, even after the original messages are removed.
*/
import type { AgentMessage } from "@mariozechner/pi-agent-core";
// ── Types ──────────────────────────────────────────────────────────────────
export type ToolFailure = {
toolName: string;
summary: string;
};
export type FileOperations = {
readFiles: string[];
modifiedFiles: string[];
};
// ── Tool failure extraction ────────────────────────────────────────────────
const MAX_TOOL_FAILURES = 8;
const ERROR_SUMMARY_MAX_LEN = 240;
/**
* Collect tool failures (is_error: true tool_result blocks) from messages.
* Deduplicates by toolCallId and caps at MAX_TOOL_FAILURES.
*/
export function collectToolFailures(messages: AgentMessage[]): ToolFailure[] {
const seen = new Set<string>();
const failures: ToolFailure[] = [];
// First pass: collect tool_use names keyed by id
const toolNameById = new Map<string, string>();
for (const msg of messages) {
if (msg.role !== "assistant") continue;
const content = (msg as any).content;
if (!Array.isArray(content)) continue;
for (const block of content) {
if (block.type === "tool_use" && block.id && block.name) {
toolNameById.set(block.id, block.name);
}
}
}
// Second pass: find is_error tool_result blocks
for (const msg of messages) {
if (msg.role !== "user") continue;
const content = (msg as any).content;
if (!Array.isArray(content)) continue;
for (const block of content) {
if (block.type !== "tool_result") continue;
if (!block.is_error) continue;
const toolCallId: string = block.tool_use_id ?? block.id ?? "";
if (!toolCallId || seen.has(toolCallId)) continue;
seen.add(toolCallId);
const toolName = toolNameById.get(toolCallId) ?? "unknown";
let errorText = typeof block.content === "string"
? block.content
: Array.isArray(block.content)
? block.content.map((b: any) => (typeof b === "string" ? b : b.text ?? "")).join(" ")
: String(block.content ?? "");
if (errorText.length > ERROR_SUMMARY_MAX_LEN) {
errorText = errorText.slice(0, ERROR_SUMMARY_MAX_LEN) + "...";
}
failures.push({ toolName, summary: errorText });
if (failures.length >= MAX_TOOL_FAILURES) return failures;
}
}
return failures;
}
// ── File operation extraction ──────────────────────────────────────────────
const READ_TOOL_NAMES = new Set(["Read", "read_file"]);
const WRITE_TOOL_NAMES = new Set(["Write", "Edit", "write_file", "file_edit"]);
/**
* Collect file read/modify operations from assistant tool_use blocks.
* readFiles excludes any path that also appears in modifiedFiles.
*/
export function collectFileOperations(messages: AgentMessage[]): FileOperations {
const readSet = new Set<string>();
const modifiedSet = new Set<string>();
for (const msg of messages) {
if (msg.role !== "assistant") continue;
const content = (msg as any).content;
if (!Array.isArray(content)) continue;
for (const block of content) {
if (block.type !== "tool_use") continue;
const name: string = block.name ?? "";
const input: any = block.input ?? {};
// Extract file path from common parameter names
const filePath: string | undefined =
input.file_path ?? input.path ?? input.filePath ?? input.filename;
if (!filePath || typeof filePath !== "string") continue;
if (READ_TOOL_NAMES.has(name)) {
readSet.add(filePath);
} else if (WRITE_TOOL_NAMES.has(name)) {
modifiedSet.add(filePath);
}
}
}
// Remove modified files from readFiles (to avoid duplication)
for (const path of modifiedSet) {
readSet.delete(path);
}
return {
readFiles: [...readSet],
modifiedFiles: [...modifiedSet],
};
}
// ── Formatting ─────────────────────────────────────────────────────────────
/**
* Format tool failures as a markdown section.
* Returns empty string if no failures.
*/
export function formatToolFailuresSection(failures: ToolFailure[]): string {
if (failures.length === 0) return "";
const lines = failures.map(
(f) => `- **${f.toolName}**: ${f.summary}`,
);
return `\n## Tool Failures\n${lines.join("\n")}`;
}
/**
* Format file operations as XML sections.
* Returns empty string if no operations.
*/
export function formatFileOperationsSection(ops: FileOperations): string {
const parts: string[] = [];
if (ops.readFiles.length > 0) {
parts.push(`<read-files>\n${ops.readFiles.join("\n")}\n</read-files>`);
}
if (ops.modifiedFiles.length > 0) {
parts.push(`<modified-files>\n${ops.modifiedFiles.join("\n")}\n</modified-files>`);
}
return parts.length > 0 ? "\n" + parts.join("\n") : "";
}

View file

@ -41,10 +41,24 @@ export {
export type { SummaryCompactionResult, SummaryCompactionParams } from "./summarization.js";
export {
splitMessagesForSummary,
detectSplitTurn,
computeAdaptiveChunkRatio,
compactMessagesWithSummary,
compactMessagesWithChunkedSummary,
} from "./summarization.js";
// Summary fallback
export { summarizeWithFallback } from "./summary-fallback.js";
// Compaction metadata
export {
collectToolFailures,
collectFileOperations,
formatToolFailuresSection,
formatFileOperationsSection,
} from "./compaction-metadata.js";
export type { ToolFailure, FileOperations } from "./compaction-metadata.js";
// Tool result pruning
export type {
ToolResultPruningSettings,

View file

@ -1,13 +1,22 @@
/**
* Summary-based Compaction
*
* Uses LLM to generate summaries of historical messages instead of simple truncation
* Uses LLM to generate summaries of historical messages instead of simple truncation.
* Includes split-turn detection, adaptive chunk sizing, multi-level fallback,
* and metadata extraction (file operations + tool failures).
*/
import type { AgentMessage } from "@mariozechner/pi-agent-core";
import { generateSummary, estimateTokens } from "@mariozechner/pi-coding-agent";
import { estimateTokens } from "@mariozechner/pi-coding-agent";
import type { Model } from "@mariozechner/pi-ai";
import { estimateMessagesTokens } from "./token-estimation.js";
import { summarizeWithFallback } from "./summary-fallback.js";
import {
collectToolFailures,
collectFileOperations,
formatToolFailuresSection,
formatFileOperationsSection,
} from "./compaction-metadata.js";
/** Summary compaction result */
export type SummaryCompactionResult = {
@ -23,6 +32,10 @@ export type SummaryCompactionResult = {
summary: string;
/** Compaction reason */
reason: "summary";
/** File operations extracted from compacted messages */
fileOperations?: { readFiles: string[]; modifiedFiles: string[] } | undefined;
/** Tool failures extracted from compacted messages */
toolFailures?: Array<{ toolName: string; summary: string }> | undefined;
};
/** Summary compaction parameters */
@ -49,7 +62,7 @@ export type SummaryCompactionParams = {
signal?: AbortSignal | undefined;
};
/** 默认摘要提示词 */
/** Default summary instructions */
const DEFAULT_SUMMARY_INSTRUCTIONS = `Summarize the conversation history concisely, focusing on:
- Key decisions made
- Important context and constraints
@ -114,6 +127,102 @@ export function splitMessagesForSummary(
return { toSummarize, toKeep };
}
// ── Split Turn Detection ───────────────────────────────────────────────────
/**
* Detect and fix a "split turn" when the first kept message is a user message
* containing tool_result blocks without the corresponding assistant tool_use.
*
* When detected, separates the orphaned turn (assistant tool_use + user tool_result)
* into a `splitPrefix` for separate summarization, and returns adjusted arrays
* where `toSummarize` no longer contains those messages.
*
* Returns null if no split turn was detected.
*/
export function detectSplitTurn(
toSummarize: AgentMessage[],
toKeep: AgentMessage[],
): {
splitPrefix: AgentMessage[];
adjustedToSummarize: AgentMessage[];
adjustedToKeep: AgentMessage[];
} | null {
if (toKeep.length === 0) return null;
const firstKept = toKeep[0]!;
if (firstKept.role !== "user") return null;
// Check if this user message has tool_result blocks
const content = (firstKept as any).content;
if (!Array.isArray(content)) return null;
const hasToolResult = content.some((b: any) => b.type === "tool_result");
if (!hasToolResult) return null;
// This is an orphaned tool_result — look back in toSummarize for the assistant tool_use
const toolResultIds = new Set(
content
.filter((b: any) => b.type === "tool_result")
.map((b: any) => b.tool_use_id ?? b.id)
.filter(Boolean),
);
// Walk backwards through toSummarize to find the assistant with matching tool_use
let assistantIndex = -1;
for (let i = toSummarize.length - 1; i >= 0; i--) {
const msg = toSummarize[i]!;
if (msg.role !== "assistant") continue;
const assistantContent = (msg as any).content;
if (!Array.isArray(assistantContent)) continue;
const hasMatchingToolUse = assistantContent.some(
(b: any) => b.type === "tool_use" && toolResultIds.has(b.id),
);
if (hasMatchingToolUse) {
assistantIndex = i;
break;
}
}
if (assistantIndex < 0) return null;
// Split prefix: messages from assistantIndex to end of toSummarize + orphaned firstKept
const splitPrefix = [
...toSummarize.slice(assistantIndex),
firstKept,
];
// Truncate toSummarize so the split prefix messages are NOT double-counted
const adjustedToSummarize = toSummarize.slice(0, assistantIndex);
const adjustedToKeep = toKeep.slice(1);
return { splitPrefix, adjustedToSummarize, adjustedToKeep };
}
// ── Adaptive Chunk Ratio ───────────────────────────────────────────────────
const ADAPTIVE_CHUNK_MIN = 0.15;
const ADAPTIVE_CHUNK_MAX = 0.4;
/**
* Compute adaptive chunk ratio based on average message token count.
* Larger average messages smaller ratio (to avoid exceeding limits).
* Return value range: [0.15, 0.4] multiply by availableTokens to get chunk size.
*/
export function computeAdaptiveChunkRatio(
messages: AgentMessage[],
): number {
if (messages.length === 0) return ADAPTIVE_CHUNK_MAX;
const totalTokens = estimateMessagesTokens(messages);
const avgTokens = totalTokens / messages.length;
// Scale inversely: high avg → low ratio
// avgTokens ~500 → ratio ~0.4; avgTokens ~5000+ → ratio ~0.15
const ratio = ADAPTIVE_CHUNK_MAX - (avgTokens / 10000) * (ADAPTIVE_CHUNK_MAX - ADAPTIVE_CHUNK_MIN);
return Math.max(ADAPTIVE_CHUNK_MIN, Math.min(ADAPTIVE_CHUNK_MAX, ratio));
}
/**
* Create summary message
*/
@ -150,7 +259,7 @@ export async function compactMessagesWithSummary(
signal,
} = params;
// 分割消息
// Split messages
const split = splitMessagesForSummary(messages, availableTokens, {
targetRatio,
minKeepMessages,
@ -160,43 +269,90 @@ export async function compactMessagesWithSummary(
return null;
}
const { toSummarize, toKeep } = split;
let { toSummarize, toKeep } = split;
// Generate summary
// Detect and handle split turn
const splitTurn = detectSplitTurn(toSummarize, toKeep);
let splitPrefixSummary = "";
if (splitTurn) {
toSummarize = splitTurn.adjustedToSummarize;
toKeep = splitTurn.adjustedToKeep;
// Summarize the split prefix separately
const instructions = customInstructions || DEFAULT_SUMMARY_INSTRUCTIONS;
const prefixResult = await summarizeWithFallback({
messages: splitTurn.splitPrefix,
model,
reserveTokens,
apiKey,
signal,
instructions,
previousSummary,
availableTokens,
});
splitPrefixSummary = prefixResult.summary;
}
// Generate summary with fallback (toSummarize no longer contains split prefix messages)
const instructions = customInstructions || DEFAULT_SUMMARY_INSTRUCTIONS;
const summary = await generateSummary(
toSummarize,
model,
reserveTokens,
apiKey,
signal,
instructions,
previousSummary,
);
let finalSummary = "";
if (toSummarize.length > 0) {
const { summary } = await summarizeWithFallback({
messages: toSummarize,
model,
reserveTokens,
apiKey,
signal,
instructions,
previousSummary,
availableTokens,
});
finalSummary = summary;
}
// Append split prefix summary if present
if (splitPrefixSummary) {
finalSummary += (finalSummary ? "\n\n" : "") + `## Split Turn Context\n${splitPrefixSummary}`;
}
// Append metadata sections (all compacted = adjusted toSummarize + splitPrefix)
const allCompactedMessages = splitTurn
? [...toSummarize, ...splitTurn.splitPrefix]
: toSummarize;
const failures = collectToolFailures(allCompactedMessages);
const fileOps = collectFileOperations(allCompactedMessages);
finalSummary += formatToolFailuresSection(failures);
finalSummary += formatFileOperationsSection(fileOps);
// Create summary message
const summaryMessage = createSummaryMessage(summary, previousSummary);
const summaryMessage = createSummaryMessage(finalSummary, previousSummary);
// Combine results
const kept = [summaryMessage, ...toKeep];
const tokensRemoved = estimateMessagesTokens(toSummarize);
const tokensRemoved = estimateMessagesTokens(allCompactedMessages);
const tokensKept = estimateMessagesTokens(kept);
return {
kept,
removedCount: toSummarize.length,
removedCount: allCompactedMessages.length,
tokensRemoved,
tokensKept,
summary,
summary: finalSummary,
reason: "summary",
fileOperations: (fileOps.readFiles.length > 0 || fileOps.modifiedFiles.length > 0) ? fileOps : undefined,
toolFailures: failures.length > 0 ? failures : undefined,
};
}
/**
* Generate summary in chunks (for very large history)
*
* When history is too large, generate summaries by chunks then merge
* When history is too large, generate summaries by chunks then merge.
* Uses adaptive chunk sizing and multi-level fallback.
*/
export async function compactMessagesWithChunkedSummary(
params: SummaryCompactionParams & {
@ -214,7 +370,6 @@ export async function compactMessagesWithChunkedSummary(
customInstructions,
previousSummary,
signal,
maxChunkTokens = 50000,
} = params;
// Split messages
@ -227,15 +382,36 @@ export async function compactMessagesWithChunkedSummary(
return null;
}
const { toSummarize, toKeep } = split;
let { toSummarize, toKeep } = split;
// If messages to summarize are not many, summarize directly
const toSummarizeTokens = estimateMessagesTokens(toSummarize);
if (toSummarizeTokens <= maxChunkTokens) {
return compactMessagesWithSummary(params);
// Detect and handle split turn
const splitTurn = detectSplitTurn(toSummarize, toKeep);
let splitPrefixSummary = "";
if (splitTurn) {
toSummarize = splitTurn.adjustedToSummarize;
toKeep = splitTurn.adjustedToKeep;
// Summarize the split prefix separately
const instructions = customInstructions || DEFAULT_SUMMARY_INSTRUCTIONS;
const prefixResult = await summarizeWithFallback({
messages: splitTurn.splitPrefix,
model,
reserveTokens,
apiKey,
signal,
instructions,
previousSummary,
availableTokens,
});
splitPrefixSummary = prefixResult.summary;
}
// Process in chunks
// Compute adaptive chunk size
const chunkRatio = computeAdaptiveChunkRatio(toSummarize);
const maxChunkTokens = params.maxChunkTokens ?? Math.floor(availableTokens * chunkRatio);
// Process in chunks (works naturally for single-chunk case too)
const chunks: AgentMessage[][] = [];
let currentChunk: AgentMessage[] = [];
let currentTokens = 0;
@ -257,27 +433,42 @@ export async function compactMessagesWithChunkedSummary(
chunks.push(currentChunk);
}
// Generate summary for each chunk
// Generate summary for each chunk with fallback
const instructions = customInstructions || DEFAULT_SUMMARY_INSTRUCTIONS;
const chunkSummaries: string[] = [];
let runningContext = previousSummary;
for (const chunk of chunks) {
const chunkSummary = await generateSummary(
chunk,
const { summary: chunkSummary } = await summarizeWithFallback({
messages: chunk,
model,
reserveTokens,
apiKey,
signal,
instructions,
runningContext,
);
previousSummary: runningContext,
availableTokens,
});
chunkSummaries.push(chunkSummary);
runningContext = chunkSummary;
}
// Final summary is the last chunk's summary (already includes previous context)
const finalSummary = chunkSummaries[chunkSummaries.length - 1] ?? "";
let finalSummary = chunkSummaries[chunkSummaries.length - 1] ?? "";
// Append split prefix summary if present
if (splitPrefixSummary) {
finalSummary += (finalSummary ? "\n\n" : "") + `## Split Turn Context\n${splitPrefixSummary}`;
}
// Append metadata sections (all compacted = adjusted toSummarize + splitPrefix)
const allCompactedMessages = splitTurn
? [...toSummarize, ...splitTurn.splitPrefix]
: toSummarize;
const failures = collectToolFailures(allCompactedMessages);
const fileOps = collectFileOperations(allCompactedMessages);
finalSummary += formatToolFailuresSection(failures);
finalSummary += formatFileOperationsSection(fileOps);
// Create summary message
const summaryMessage = createSummaryMessage(finalSummary);
@ -285,15 +476,17 @@ export async function compactMessagesWithChunkedSummary(
// Combine results
const kept = [summaryMessage, ...toKeep];
const tokensRemoved = estimateMessagesTokens(toSummarize);
const tokensRemoved = estimateMessagesTokens(allCompactedMessages);
const tokensKept = estimateMessagesTokens(kept);
return {
kept,
removedCount: toSummarize.length,
removedCount: allCompactedMessages.length,
tokensRemoved,
tokensKept,
summary: finalSummary,
reason: "summary",
fileOperations: (fileOps.readFiles.length > 0 || fileOps.modifiedFiles.length > 0) ? fileOps : undefined,
toolFailures: failures.length > 0 ? failures : undefined,
};
}

View file

@ -0,0 +1,128 @@
/**
* Summary Fallback multi-level degradation for summary compaction
*
* Level 1: Full LLM summary via generateSummary()
* Level 2: Exclude oversized messages (> 50% context window), retry summary
* Level 3: Plain-text fallback summary (with metadata: file ops + tool failures)
*/
import type { AgentMessage } from "@mariozechner/pi-agent-core";
import { generateSummary, estimateTokens } from "@mariozechner/pi-coding-agent";
import type { Model } from "@mariozechner/pi-ai";
import {
collectToolFailures,
collectFileOperations,
formatToolFailuresSection,
formatFileOperationsSection,
} from "./compaction-metadata.js";
export type SummarizeWithFallbackParams = {
/** Messages to summarize */
messages: AgentMessage[];
/** LLM model */
model: Model<any>;
/** Max tokens reserved for summary output */
reserveTokens: number;
/** API key */
apiKey: string;
/** AbortSignal */
signal?: AbortSignal | undefined;
/** Summary instructions */
instructions: string;
/** Previous summary for incremental context */
previousSummary?: string | undefined;
/** Available context window tokens (used for oversized-message filtering) */
availableTokens: number;
};
/**
* Attempt to generate an LLM summary with multi-level fallback.
*
* Returns { summary, level } where level indicates which fallback tier succeeded:
* 1 = full summary, 2 = filtered summary, 3 = plain-text fallback
*/
export async function summarizeWithFallback(
params: SummarizeWithFallbackParams,
): Promise<{ summary: string; level: 1 | 2 | 3 }> {
const {
messages,
model,
reserveTokens,
apiKey,
signal,
instructions,
previousSummary,
availableTokens,
} = params;
// ── Level 1: Full summary ────────────────────────────────────────────
try {
const summary = await generateSummary(
messages,
model,
reserveTokens,
apiKey,
signal,
instructions,
previousSummary,
);
return { summary, level: 1 };
} catch (err) {
console.warn(`[summary-fallback] Level 1 (full summary) failed: ${err}`);
}
// ── Level 2: Exclude oversized messages, retry ───────────────────────
const oversizeThreshold = availableTokens * 0.5;
const filtered = messages.filter((msg) => estimateTokens(msg) <= oversizeThreshold);
if (filtered.length > 0 && filtered.length < messages.length) {
try {
const summary = await generateSummary(
filtered,
model,
reserveTokens,
apiKey,
signal,
instructions,
previousSummary,
);
return { summary, level: 2 };
} catch (err) {
console.warn(`[summary-fallback] Level 2 (filtered summary) failed: ${err}`);
}
}
// ── Level 3: Plain-text fallback with metadata ───────────────────────
const summary = buildPlainTextFallback(messages, previousSummary);
return { summary, level: 3 };
}
/**
* Build a plain-text fallback summary from metadata extraction only (no LLM).
*/
function buildPlainTextFallback(
messages: AgentMessage[],
previousSummary?: string,
): string {
const parts: string[] = [];
if (previousSummary) {
parts.push(`## Previous Context\n${previousSummary}`);
}
parts.push(
`## Compaction Note\nLLM summarization was unavailable. ${messages.length} messages were compacted. ` +
`Below is automatically extracted metadata from the removed messages.`,
);
// Extract and append metadata (format functions return strings with leading \n,
// designed for direct concatenation — so we concatenate rather than join)
const failures = collectToolFailures(messages);
const fileOps = collectFileOperations(messages);
let result = parts.join("\n\n");
result += formatToolFailuresSection(failures);
result += formatFileOperationsSection(fileOps);
return result;
}

View file

@ -24,6 +24,8 @@ export type CompactionEndEvent = {
tokensRemoved?: number | undefined;
tokensKept?: number | undefined;
reason: "count" | "tokens" | "summary" | "pruning";
/** Generated summary text (only present when reason is "summary") */
summary?: string | undefined;
};
/** Emitted when an agent encounters an error during execution */

View file

@ -271,12 +271,12 @@ export class Agent {
);
}
// 确定 compaction 模式
const compactionMode = options.compactionMode ?? "tokens"; // 默认使用 token 模式
// Determine compaction mode (default: summary with LLM-based summarization)
const compactionMode = options.compactionMode ?? "summary";
// 获取 API Key用于 summary 模式)
// Resolve API key for summary mode (reuse the agent's own key)
const summaryApiKey = compactionMode === "summary"
? resolveApiKey(this.resolvedProvider, options.apiKey)
? (resolveApiKey(this.resolvedProvider, options.apiKey) ?? this.currentApiKey)
: undefined;
// Store reserveTokens for pre-flight compaction
@ -292,7 +292,7 @@ export class Agent {
reserveTokens: options.reserveTokens,
targetRatio: options.compactionTargetRatio,
minKeepMessages: options.minKeepMessages,
// Summary 模式参数
// Summary mode parameters
model: compactionMode === "summary" ? model : undefined,
apiKey: summaryApiKey,
customInstructions: options.summaryInstructions,
@ -764,6 +764,7 @@ export class Agent {
tokensRemoved: result.tokensRemoved,
tokensKept: result.tokensKept,
reason: result.reason ?? "tokens",
summary: result.summary,
};
this.emitMulticaEvent(endEvent);
}

View file

@ -19,6 +19,10 @@ export type CompactionResult = {
tokensKept?: number | undefined;
/** Summary generated in summary mode */
summary?: string | undefined;
/** File operations extracted from compacted messages */
fileOperations?: { readFiles: string[]; modifiedFiles: string[] } | undefined;
/** Tool failures extracted from compacted messages */
toolFailures?: Array<{ toolName: string; summary: string }> | undefined;
/** Reason for compaction: count, tokens, summary, or pruning (tool result trimming only) */
reason: "count" | "tokens" | "summary" | "pruning";
};
@ -249,6 +253,8 @@ export async function compactMessagesAsync(
tokensRemoved: result.tokensRemoved,
tokensKept: result.tokensKept,
summary: result.summary,
fileOperations: result.fileOperations,
toolFailures: result.toolFailures,
reason: "summary",
};
}

View file

@ -95,8 +95,8 @@ export class SessionManager {
this.sessionId = options.sessionId;
this.baseDir = options.baseDir;
// Compaction mode
this.compactionMode = options.compactionMode ?? "count";
// Compaction mode (default: summary with LLM-based summarization)
this.compactionMode = options.compactionMode ?? "summary";
// Count mode parameters
this.maxMessages = options.maxMessages ?? 80;
@ -312,22 +312,37 @@ export class SessionManager {
minKeepMessages: this.minKeepMessages,
});
} else {
result = await compactMessagesAsync(workingMessages, {
mode: "summary",
model,
apiKey,
contextWindowTokens: this.contextWindowTokens,
systemPrompt: this.systemPrompt,
reserveTokens: this.reserveTokens,
targetRatio: this.targetRatio,
minKeepMessages: this.minKeepMessages,
customInstructions: this.customInstructions,
previousSummary: this.previousSummary,
});
try {
result = await compactMessagesAsync(workingMessages, {
mode: "summary",
model,
apiKey,
contextWindowTokens: this.contextWindowTokens,
systemPrompt: this.systemPrompt,
reserveTokens: this.reserveTokens,
targetRatio: this.targetRatio,
minKeepMessages: this.minKeepMessages,
customInstructions: this.customInstructions,
previousSummary: this.previousSummary,
});
// Save summary for next incremental update
if (result?.summary) {
this.previousSummary = result.summary;
// Save summary for next incremental update
if (result?.summary) {
this.previousSummary = result.summary;
}
} catch (err) {
// Summary compaction failed entirely — fall back to tokens mode
console.error(
`[SessionManager] Summary compaction failed, falling back to tokens mode: ${err}`,
);
result = compactMessages(workingMessages, {
mode: "tokens",
contextWindowTokens: this.contextWindowTokens,
systemPrompt: this.systemPrompt,
reserveTokens: this.reserveTokens,
targetRatio: this.targetRatio,
minKeepMessages: this.minKeepMessages,
});
}
}
} else {

View file

@ -51,8 +51,8 @@ export type AgentOptions = {
/**
* Compaction mode:
* - "count": uses legacy message count
* - "tokens": uses token awareness (default)
* - "summary": uses LLM to generate summary
* - "tokens": uses token awareness
* - "summary": uses LLM to generate summary (default)
*/
compactionMode?: "count" | "tokens" | "summary" | undefined;
/** Compaction target utilization ratio (0-1), defaults to 0.5 */