Merge pull request #157 from multica-ai/feat/improve-compaction
feat(compaction): improve summary mode with fallback, split-turn, and metadata
This commit is contained in:
commit
882dc8592b
9 changed files with 574 additions and 59 deletions
156
packages/core/src/agent/context-window/compaction-metadata.ts
Normal file
156
packages/core/src/agent/context-window/compaction-metadata.ts
Normal file
|
|
@ -0,0 +1,156 @@
|
|||
/**
|
||||
* Compaction Metadata — extract file operations & tool failures from compacted messages
|
||||
*
|
||||
* Appended to summaries so the agent retains awareness of what files were touched
|
||||
* and which tool invocations failed, even after the original messages are removed.
|
||||
*/
|
||||
|
||||
import type { AgentMessage } from "@mariozechner/pi-agent-core";
|
||||
|
||||
// ── Types ──────────────────────────────────────────────────────────────────
|
||||
|
||||
export type ToolFailure = {
|
||||
toolName: string;
|
||||
summary: string;
|
||||
};
|
||||
|
||||
export type FileOperations = {
|
||||
readFiles: string[];
|
||||
modifiedFiles: string[];
|
||||
};
|
||||
|
||||
// ── Tool failure extraction ────────────────────────────────────────────────
|
||||
|
||||
const MAX_TOOL_FAILURES = 8;
|
||||
const ERROR_SUMMARY_MAX_LEN = 240;
|
||||
|
||||
/**
|
||||
* Collect tool failures (is_error: true tool_result blocks) from messages.
|
||||
* Deduplicates by toolCallId and caps at MAX_TOOL_FAILURES.
|
||||
*/
|
||||
export function collectToolFailures(messages: AgentMessage[]): ToolFailure[] {
|
||||
const seen = new Set<string>();
|
||||
const failures: ToolFailure[] = [];
|
||||
|
||||
// First pass: collect tool_use names keyed by id
|
||||
const toolNameById = new Map<string, string>();
|
||||
for (const msg of messages) {
|
||||
if (msg.role !== "assistant") continue;
|
||||
const content = (msg as any).content;
|
||||
if (!Array.isArray(content)) continue;
|
||||
for (const block of content) {
|
||||
if (block.type === "tool_use" && block.id && block.name) {
|
||||
toolNameById.set(block.id, block.name);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Second pass: find is_error tool_result blocks
|
||||
for (const msg of messages) {
|
||||
if (msg.role !== "user") continue;
|
||||
const content = (msg as any).content;
|
||||
if (!Array.isArray(content)) continue;
|
||||
for (const block of content) {
|
||||
if (block.type !== "tool_result") continue;
|
||||
if (!block.is_error) continue;
|
||||
|
||||
const toolCallId: string = block.tool_use_id ?? block.id ?? "";
|
||||
if (!toolCallId || seen.has(toolCallId)) continue;
|
||||
seen.add(toolCallId);
|
||||
|
||||
const toolName = toolNameById.get(toolCallId) ?? "unknown";
|
||||
let errorText = typeof block.content === "string"
|
||||
? block.content
|
||||
: Array.isArray(block.content)
|
||||
? block.content.map((b: any) => (typeof b === "string" ? b : b.text ?? "")).join(" ")
|
||||
: String(block.content ?? "");
|
||||
if (errorText.length > ERROR_SUMMARY_MAX_LEN) {
|
||||
errorText = errorText.slice(0, ERROR_SUMMARY_MAX_LEN) + "...";
|
||||
}
|
||||
|
||||
failures.push({ toolName, summary: errorText });
|
||||
if (failures.length >= MAX_TOOL_FAILURES) return failures;
|
||||
}
|
||||
}
|
||||
|
||||
return failures;
|
||||
}
|
||||
|
||||
// ── File operation extraction ──────────────────────────────────────────────
|
||||
|
||||
const READ_TOOL_NAMES = new Set(["Read", "read_file"]);
|
||||
const WRITE_TOOL_NAMES = new Set(["Write", "Edit", "write_file", "file_edit"]);
|
||||
|
||||
/**
|
||||
* Collect file read/modify operations from assistant tool_use blocks.
|
||||
* readFiles excludes any path that also appears in modifiedFiles.
|
||||
*/
|
||||
export function collectFileOperations(messages: AgentMessage[]): FileOperations {
|
||||
const readSet = new Set<string>();
|
||||
const modifiedSet = new Set<string>();
|
||||
|
||||
for (const msg of messages) {
|
||||
if (msg.role !== "assistant") continue;
|
||||
const content = (msg as any).content;
|
||||
if (!Array.isArray(content)) continue;
|
||||
|
||||
for (const block of content) {
|
||||
if (block.type !== "tool_use") continue;
|
||||
const name: string = block.name ?? "";
|
||||
const input: any = block.input ?? {};
|
||||
|
||||
// Extract file path from common parameter names
|
||||
const filePath: string | undefined =
|
||||
input.file_path ?? input.path ?? input.filePath ?? input.filename;
|
||||
if (!filePath || typeof filePath !== "string") continue;
|
||||
|
||||
if (READ_TOOL_NAMES.has(name)) {
|
||||
readSet.add(filePath);
|
||||
} else if (WRITE_TOOL_NAMES.has(name)) {
|
||||
modifiedSet.add(filePath);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Remove modified files from readFiles (to avoid duplication)
|
||||
for (const path of modifiedSet) {
|
||||
readSet.delete(path);
|
||||
}
|
||||
|
||||
return {
|
||||
readFiles: [...readSet],
|
||||
modifiedFiles: [...modifiedSet],
|
||||
};
|
||||
}
|
||||
|
||||
// ── Formatting ─────────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Format tool failures as a markdown section.
|
||||
* Returns empty string if no failures.
|
||||
*/
|
||||
export function formatToolFailuresSection(failures: ToolFailure[]): string {
|
||||
if (failures.length === 0) return "";
|
||||
|
||||
const lines = failures.map(
|
||||
(f) => `- **${f.toolName}**: ${f.summary}`,
|
||||
);
|
||||
return `\n## Tool Failures\n${lines.join("\n")}`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Format file operations as XML sections.
|
||||
* Returns empty string if no operations.
|
||||
*/
|
||||
export function formatFileOperationsSection(ops: FileOperations): string {
|
||||
const parts: string[] = [];
|
||||
|
||||
if (ops.readFiles.length > 0) {
|
||||
parts.push(`<read-files>\n${ops.readFiles.join("\n")}\n</read-files>`);
|
||||
}
|
||||
if (ops.modifiedFiles.length > 0) {
|
||||
parts.push(`<modified-files>\n${ops.modifiedFiles.join("\n")}\n</modified-files>`);
|
||||
}
|
||||
|
||||
return parts.length > 0 ? "\n" + parts.join("\n") : "";
|
||||
}
|
||||
|
|
@ -41,10 +41,24 @@ export {
|
|||
export type { SummaryCompactionResult, SummaryCompactionParams } from "./summarization.js";
|
||||
export {
|
||||
splitMessagesForSummary,
|
||||
detectSplitTurn,
|
||||
computeAdaptiveChunkRatio,
|
||||
compactMessagesWithSummary,
|
||||
compactMessagesWithChunkedSummary,
|
||||
} from "./summarization.js";
|
||||
|
||||
// Summary fallback
|
||||
export { summarizeWithFallback } from "./summary-fallback.js";
|
||||
|
||||
// Compaction metadata
|
||||
export {
|
||||
collectToolFailures,
|
||||
collectFileOperations,
|
||||
formatToolFailuresSection,
|
||||
formatFileOperationsSection,
|
||||
} from "./compaction-metadata.js";
|
||||
export type { ToolFailure, FileOperations } from "./compaction-metadata.js";
|
||||
|
||||
// Tool result pruning
|
||||
export type {
|
||||
ToolResultPruningSettings,
|
||||
|
|
|
|||
|
|
@ -1,13 +1,22 @@
|
|||
/**
|
||||
* Summary-based Compaction
|
||||
*
|
||||
* Uses LLM to generate summaries of historical messages instead of simple truncation
|
||||
* Uses LLM to generate summaries of historical messages instead of simple truncation.
|
||||
* Includes split-turn detection, adaptive chunk sizing, multi-level fallback,
|
||||
* and metadata extraction (file operations + tool failures).
|
||||
*/
|
||||
|
||||
import type { AgentMessage } from "@mariozechner/pi-agent-core";
|
||||
import { generateSummary, estimateTokens } from "@mariozechner/pi-coding-agent";
|
||||
import { estimateTokens } from "@mariozechner/pi-coding-agent";
|
||||
import type { Model } from "@mariozechner/pi-ai";
|
||||
import { estimateMessagesTokens } from "./token-estimation.js";
|
||||
import { summarizeWithFallback } from "./summary-fallback.js";
|
||||
import {
|
||||
collectToolFailures,
|
||||
collectFileOperations,
|
||||
formatToolFailuresSection,
|
||||
formatFileOperationsSection,
|
||||
} from "./compaction-metadata.js";
|
||||
|
||||
/** Summary compaction result */
|
||||
export type SummaryCompactionResult = {
|
||||
|
|
@ -23,6 +32,10 @@ export type SummaryCompactionResult = {
|
|||
summary: string;
|
||||
/** Compaction reason */
|
||||
reason: "summary";
|
||||
/** File operations extracted from compacted messages */
|
||||
fileOperations?: { readFiles: string[]; modifiedFiles: string[] } | undefined;
|
||||
/** Tool failures extracted from compacted messages */
|
||||
toolFailures?: Array<{ toolName: string; summary: string }> | undefined;
|
||||
};
|
||||
|
||||
/** Summary compaction parameters */
|
||||
|
|
@ -49,7 +62,7 @@ export type SummaryCompactionParams = {
|
|||
signal?: AbortSignal | undefined;
|
||||
};
|
||||
|
||||
/** 默认摘要提示词 */
|
||||
/** Default summary instructions */
|
||||
const DEFAULT_SUMMARY_INSTRUCTIONS = `Summarize the conversation history concisely, focusing on:
|
||||
- Key decisions made
|
||||
- Important context and constraints
|
||||
|
|
@ -114,6 +127,102 @@ export function splitMessagesForSummary(
|
|||
return { toSummarize, toKeep };
|
||||
}
|
||||
|
||||
// ── Split Turn Detection ───────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Detect and fix a "split turn" — when the first kept message is a user message
|
||||
* containing tool_result blocks without the corresponding assistant tool_use.
|
||||
*
|
||||
* When detected, separates the orphaned turn (assistant tool_use + user tool_result)
|
||||
* into a `splitPrefix` for separate summarization, and returns adjusted arrays
|
||||
* where `toSummarize` no longer contains those messages.
|
||||
*
|
||||
* Returns null if no split turn was detected.
|
||||
*/
|
||||
export function detectSplitTurn(
|
||||
toSummarize: AgentMessage[],
|
||||
toKeep: AgentMessage[],
|
||||
): {
|
||||
splitPrefix: AgentMessage[];
|
||||
adjustedToSummarize: AgentMessage[];
|
||||
adjustedToKeep: AgentMessage[];
|
||||
} | null {
|
||||
if (toKeep.length === 0) return null;
|
||||
|
||||
const firstKept = toKeep[0]!;
|
||||
if (firstKept.role !== "user") return null;
|
||||
|
||||
// Check if this user message has tool_result blocks
|
||||
const content = (firstKept as any).content;
|
||||
if (!Array.isArray(content)) return null;
|
||||
|
||||
const hasToolResult = content.some((b: any) => b.type === "tool_result");
|
||||
if (!hasToolResult) return null;
|
||||
|
||||
// This is an orphaned tool_result — look back in toSummarize for the assistant tool_use
|
||||
const toolResultIds = new Set(
|
||||
content
|
||||
.filter((b: any) => b.type === "tool_result")
|
||||
.map((b: any) => b.tool_use_id ?? b.id)
|
||||
.filter(Boolean),
|
||||
);
|
||||
|
||||
// Walk backwards through toSummarize to find the assistant with matching tool_use
|
||||
let assistantIndex = -1;
|
||||
for (let i = toSummarize.length - 1; i >= 0; i--) {
|
||||
const msg = toSummarize[i]!;
|
||||
if (msg.role !== "assistant") continue;
|
||||
|
||||
const assistantContent = (msg as any).content;
|
||||
if (!Array.isArray(assistantContent)) continue;
|
||||
|
||||
const hasMatchingToolUse = assistantContent.some(
|
||||
(b: any) => b.type === "tool_use" && toolResultIds.has(b.id),
|
||||
);
|
||||
if (hasMatchingToolUse) {
|
||||
assistantIndex = i;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (assistantIndex < 0) return null;
|
||||
|
||||
// Split prefix: messages from assistantIndex to end of toSummarize + orphaned firstKept
|
||||
const splitPrefix = [
|
||||
...toSummarize.slice(assistantIndex),
|
||||
firstKept,
|
||||
];
|
||||
// Truncate toSummarize so the split prefix messages are NOT double-counted
|
||||
const adjustedToSummarize = toSummarize.slice(0, assistantIndex);
|
||||
const adjustedToKeep = toKeep.slice(1);
|
||||
|
||||
return { splitPrefix, adjustedToSummarize, adjustedToKeep };
|
||||
}
|
||||
|
||||
// ── Adaptive Chunk Ratio ───────────────────────────────────────────────────
|
||||
|
||||
const ADAPTIVE_CHUNK_MIN = 0.15;
|
||||
const ADAPTIVE_CHUNK_MAX = 0.4;
|
||||
|
||||
/**
|
||||
* Compute adaptive chunk ratio based on average message token count.
|
||||
* Larger average messages → smaller ratio (to avoid exceeding limits).
|
||||
* Return value range: [0.15, 0.4] — multiply by availableTokens to get chunk size.
|
||||
*/
|
||||
export function computeAdaptiveChunkRatio(
|
||||
messages: AgentMessage[],
|
||||
): number {
|
||||
if (messages.length === 0) return ADAPTIVE_CHUNK_MAX;
|
||||
|
||||
const totalTokens = estimateMessagesTokens(messages);
|
||||
const avgTokens = totalTokens / messages.length;
|
||||
|
||||
// Scale inversely: high avg → low ratio
|
||||
// avgTokens ~500 → ratio ~0.4; avgTokens ~5000+ → ratio ~0.15
|
||||
const ratio = ADAPTIVE_CHUNK_MAX - (avgTokens / 10000) * (ADAPTIVE_CHUNK_MAX - ADAPTIVE_CHUNK_MIN);
|
||||
return Math.max(ADAPTIVE_CHUNK_MIN, Math.min(ADAPTIVE_CHUNK_MAX, ratio));
|
||||
}
|
||||
|
||||
/**
|
||||
* Create summary message
|
||||
*/
|
||||
|
|
@ -150,7 +259,7 @@ export async function compactMessagesWithSummary(
|
|||
signal,
|
||||
} = params;
|
||||
|
||||
// 分割消息
|
||||
// Split messages
|
||||
const split = splitMessagesForSummary(messages, availableTokens, {
|
||||
targetRatio,
|
||||
minKeepMessages,
|
||||
|
|
@ -160,43 +269,90 @@ export async function compactMessagesWithSummary(
|
|||
return null;
|
||||
}
|
||||
|
||||
const { toSummarize, toKeep } = split;
|
||||
let { toSummarize, toKeep } = split;
|
||||
|
||||
// Generate summary
|
||||
// Detect and handle split turn
|
||||
const splitTurn = detectSplitTurn(toSummarize, toKeep);
|
||||
let splitPrefixSummary = "";
|
||||
|
||||
if (splitTurn) {
|
||||
toSummarize = splitTurn.adjustedToSummarize;
|
||||
toKeep = splitTurn.adjustedToKeep;
|
||||
|
||||
// Summarize the split prefix separately
|
||||
const instructions = customInstructions || DEFAULT_SUMMARY_INSTRUCTIONS;
|
||||
const prefixResult = await summarizeWithFallback({
|
||||
messages: splitTurn.splitPrefix,
|
||||
model,
|
||||
reserveTokens,
|
||||
apiKey,
|
||||
signal,
|
||||
instructions,
|
||||
previousSummary,
|
||||
availableTokens,
|
||||
});
|
||||
splitPrefixSummary = prefixResult.summary;
|
||||
}
|
||||
|
||||
// Generate summary with fallback (toSummarize no longer contains split prefix messages)
|
||||
const instructions = customInstructions || DEFAULT_SUMMARY_INSTRUCTIONS;
|
||||
const summary = await generateSummary(
|
||||
toSummarize,
|
||||
model,
|
||||
reserveTokens,
|
||||
apiKey,
|
||||
signal,
|
||||
instructions,
|
||||
previousSummary,
|
||||
);
|
||||
let finalSummary = "";
|
||||
|
||||
if (toSummarize.length > 0) {
|
||||
const { summary } = await summarizeWithFallback({
|
||||
messages: toSummarize,
|
||||
model,
|
||||
reserveTokens,
|
||||
apiKey,
|
||||
signal,
|
||||
instructions,
|
||||
previousSummary,
|
||||
availableTokens,
|
||||
});
|
||||
finalSummary = summary;
|
||||
}
|
||||
|
||||
// Append split prefix summary if present
|
||||
if (splitPrefixSummary) {
|
||||
finalSummary += (finalSummary ? "\n\n" : "") + `## Split Turn Context\n${splitPrefixSummary}`;
|
||||
}
|
||||
|
||||
// Append metadata sections (all compacted = adjusted toSummarize + splitPrefix)
|
||||
const allCompactedMessages = splitTurn
|
||||
? [...toSummarize, ...splitTurn.splitPrefix]
|
||||
: toSummarize;
|
||||
const failures = collectToolFailures(allCompactedMessages);
|
||||
const fileOps = collectFileOperations(allCompactedMessages);
|
||||
|
||||
finalSummary += formatToolFailuresSection(failures);
|
||||
finalSummary += formatFileOperationsSection(fileOps);
|
||||
|
||||
// Create summary message
|
||||
const summaryMessage = createSummaryMessage(summary, previousSummary);
|
||||
const summaryMessage = createSummaryMessage(finalSummary, previousSummary);
|
||||
|
||||
// Combine results
|
||||
const kept = [summaryMessage, ...toKeep];
|
||||
|
||||
const tokensRemoved = estimateMessagesTokens(toSummarize);
|
||||
const tokensRemoved = estimateMessagesTokens(allCompactedMessages);
|
||||
const tokensKept = estimateMessagesTokens(kept);
|
||||
|
||||
return {
|
||||
kept,
|
||||
removedCount: toSummarize.length,
|
||||
removedCount: allCompactedMessages.length,
|
||||
tokensRemoved,
|
||||
tokensKept,
|
||||
summary,
|
||||
summary: finalSummary,
|
||||
reason: "summary",
|
||||
fileOperations: (fileOps.readFiles.length > 0 || fileOps.modifiedFiles.length > 0) ? fileOps : undefined,
|
||||
toolFailures: failures.length > 0 ? failures : undefined,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate summary in chunks (for very large history)
|
||||
*
|
||||
* When history is too large, generate summaries by chunks then merge
|
||||
* When history is too large, generate summaries by chunks then merge.
|
||||
* Uses adaptive chunk sizing and multi-level fallback.
|
||||
*/
|
||||
export async function compactMessagesWithChunkedSummary(
|
||||
params: SummaryCompactionParams & {
|
||||
|
|
@ -214,7 +370,6 @@ export async function compactMessagesWithChunkedSummary(
|
|||
customInstructions,
|
||||
previousSummary,
|
||||
signal,
|
||||
maxChunkTokens = 50000,
|
||||
} = params;
|
||||
|
||||
// Split messages
|
||||
|
|
@ -227,15 +382,36 @@ export async function compactMessagesWithChunkedSummary(
|
|||
return null;
|
||||
}
|
||||
|
||||
const { toSummarize, toKeep } = split;
|
||||
let { toSummarize, toKeep } = split;
|
||||
|
||||
// If messages to summarize are not many, summarize directly
|
||||
const toSummarizeTokens = estimateMessagesTokens(toSummarize);
|
||||
if (toSummarizeTokens <= maxChunkTokens) {
|
||||
return compactMessagesWithSummary(params);
|
||||
// Detect and handle split turn
|
||||
const splitTurn = detectSplitTurn(toSummarize, toKeep);
|
||||
let splitPrefixSummary = "";
|
||||
|
||||
if (splitTurn) {
|
||||
toSummarize = splitTurn.adjustedToSummarize;
|
||||
toKeep = splitTurn.adjustedToKeep;
|
||||
|
||||
// Summarize the split prefix separately
|
||||
const instructions = customInstructions || DEFAULT_SUMMARY_INSTRUCTIONS;
|
||||
const prefixResult = await summarizeWithFallback({
|
||||
messages: splitTurn.splitPrefix,
|
||||
model,
|
||||
reserveTokens,
|
||||
apiKey,
|
||||
signal,
|
||||
instructions,
|
||||
previousSummary,
|
||||
availableTokens,
|
||||
});
|
||||
splitPrefixSummary = prefixResult.summary;
|
||||
}
|
||||
|
||||
// Process in chunks
|
||||
// Compute adaptive chunk size
|
||||
const chunkRatio = computeAdaptiveChunkRatio(toSummarize);
|
||||
const maxChunkTokens = params.maxChunkTokens ?? Math.floor(availableTokens * chunkRatio);
|
||||
|
||||
// Process in chunks (works naturally for single-chunk case too)
|
||||
const chunks: AgentMessage[][] = [];
|
||||
let currentChunk: AgentMessage[] = [];
|
||||
let currentTokens = 0;
|
||||
|
|
@ -257,27 +433,42 @@ export async function compactMessagesWithChunkedSummary(
|
|||
chunks.push(currentChunk);
|
||||
}
|
||||
|
||||
// Generate summary for each chunk
|
||||
// Generate summary for each chunk with fallback
|
||||
const instructions = customInstructions || DEFAULT_SUMMARY_INSTRUCTIONS;
|
||||
const chunkSummaries: string[] = [];
|
||||
|
||||
let runningContext = previousSummary;
|
||||
for (const chunk of chunks) {
|
||||
const chunkSummary = await generateSummary(
|
||||
chunk,
|
||||
const { summary: chunkSummary } = await summarizeWithFallback({
|
||||
messages: chunk,
|
||||
model,
|
||||
reserveTokens,
|
||||
apiKey,
|
||||
signal,
|
||||
instructions,
|
||||
runningContext,
|
||||
);
|
||||
previousSummary: runningContext,
|
||||
availableTokens,
|
||||
});
|
||||
chunkSummaries.push(chunkSummary);
|
||||
runningContext = chunkSummary;
|
||||
}
|
||||
|
||||
// Final summary is the last chunk's summary (already includes previous context)
|
||||
const finalSummary = chunkSummaries[chunkSummaries.length - 1] ?? "";
|
||||
let finalSummary = chunkSummaries[chunkSummaries.length - 1] ?? "";
|
||||
|
||||
// Append split prefix summary if present
|
||||
if (splitPrefixSummary) {
|
||||
finalSummary += (finalSummary ? "\n\n" : "") + `## Split Turn Context\n${splitPrefixSummary}`;
|
||||
}
|
||||
|
||||
// Append metadata sections (all compacted = adjusted toSummarize + splitPrefix)
|
||||
const allCompactedMessages = splitTurn
|
||||
? [...toSummarize, ...splitTurn.splitPrefix]
|
||||
: toSummarize;
|
||||
const failures = collectToolFailures(allCompactedMessages);
|
||||
const fileOps = collectFileOperations(allCompactedMessages);
|
||||
finalSummary += formatToolFailuresSection(failures);
|
||||
finalSummary += formatFileOperationsSection(fileOps);
|
||||
|
||||
// Create summary message
|
||||
const summaryMessage = createSummaryMessage(finalSummary);
|
||||
|
|
@ -285,15 +476,17 @@ export async function compactMessagesWithChunkedSummary(
|
|||
// Combine results
|
||||
const kept = [summaryMessage, ...toKeep];
|
||||
|
||||
const tokensRemoved = estimateMessagesTokens(toSummarize);
|
||||
const tokensRemoved = estimateMessagesTokens(allCompactedMessages);
|
||||
const tokensKept = estimateMessagesTokens(kept);
|
||||
|
||||
return {
|
||||
kept,
|
||||
removedCount: toSummarize.length,
|
||||
removedCount: allCompactedMessages.length,
|
||||
tokensRemoved,
|
||||
tokensKept,
|
||||
summary: finalSummary,
|
||||
reason: "summary",
|
||||
fileOperations: (fileOps.readFiles.length > 0 || fileOps.modifiedFiles.length > 0) ? fileOps : undefined,
|
||||
toolFailures: failures.length > 0 ? failures : undefined,
|
||||
};
|
||||
}
|
||||
|
|
|
|||
128
packages/core/src/agent/context-window/summary-fallback.ts
Normal file
128
packages/core/src/agent/context-window/summary-fallback.ts
Normal file
|
|
@ -0,0 +1,128 @@
|
|||
/**
|
||||
* Summary Fallback — multi-level degradation for summary compaction
|
||||
*
|
||||
* Level 1: Full LLM summary via generateSummary()
|
||||
* Level 2: Exclude oversized messages (> 50% context window), retry summary
|
||||
* Level 3: Plain-text fallback summary (with metadata: file ops + tool failures)
|
||||
*/
|
||||
|
||||
import type { AgentMessage } from "@mariozechner/pi-agent-core";
|
||||
import { generateSummary, estimateTokens } from "@mariozechner/pi-coding-agent";
|
||||
import type { Model } from "@mariozechner/pi-ai";
|
||||
import {
|
||||
collectToolFailures,
|
||||
collectFileOperations,
|
||||
formatToolFailuresSection,
|
||||
formatFileOperationsSection,
|
||||
} from "./compaction-metadata.js";
|
||||
|
||||
export type SummarizeWithFallbackParams = {
|
||||
/** Messages to summarize */
|
||||
messages: AgentMessage[];
|
||||
/** LLM model */
|
||||
model: Model<any>;
|
||||
/** Max tokens reserved for summary output */
|
||||
reserveTokens: number;
|
||||
/** API key */
|
||||
apiKey: string;
|
||||
/** AbortSignal */
|
||||
signal?: AbortSignal | undefined;
|
||||
/** Summary instructions */
|
||||
instructions: string;
|
||||
/** Previous summary for incremental context */
|
||||
previousSummary?: string | undefined;
|
||||
/** Available context window tokens (used for oversized-message filtering) */
|
||||
availableTokens: number;
|
||||
};
|
||||
|
||||
/**
|
||||
* Attempt to generate an LLM summary with multi-level fallback.
|
||||
*
|
||||
* Returns { summary, level } where level indicates which fallback tier succeeded:
|
||||
* 1 = full summary, 2 = filtered summary, 3 = plain-text fallback
|
||||
*/
|
||||
export async function summarizeWithFallback(
|
||||
params: SummarizeWithFallbackParams,
|
||||
): Promise<{ summary: string; level: 1 | 2 | 3 }> {
|
||||
const {
|
||||
messages,
|
||||
model,
|
||||
reserveTokens,
|
||||
apiKey,
|
||||
signal,
|
||||
instructions,
|
||||
previousSummary,
|
||||
availableTokens,
|
||||
} = params;
|
||||
|
||||
// ── Level 1: Full summary ────────────────────────────────────────────
|
||||
try {
|
||||
const summary = await generateSummary(
|
||||
messages,
|
||||
model,
|
||||
reserveTokens,
|
||||
apiKey,
|
||||
signal,
|
||||
instructions,
|
||||
previousSummary,
|
||||
);
|
||||
return { summary, level: 1 };
|
||||
} catch (err) {
|
||||
console.warn(`[summary-fallback] Level 1 (full summary) failed: ${err}`);
|
||||
}
|
||||
|
||||
// ── Level 2: Exclude oversized messages, retry ───────────────────────
|
||||
const oversizeThreshold = availableTokens * 0.5;
|
||||
const filtered = messages.filter((msg) => estimateTokens(msg) <= oversizeThreshold);
|
||||
|
||||
if (filtered.length > 0 && filtered.length < messages.length) {
|
||||
try {
|
||||
const summary = await generateSummary(
|
||||
filtered,
|
||||
model,
|
||||
reserveTokens,
|
||||
apiKey,
|
||||
signal,
|
||||
instructions,
|
||||
previousSummary,
|
||||
);
|
||||
return { summary, level: 2 };
|
||||
} catch (err) {
|
||||
console.warn(`[summary-fallback] Level 2 (filtered summary) failed: ${err}`);
|
||||
}
|
||||
}
|
||||
|
||||
// ── Level 3: Plain-text fallback with metadata ───────────────────────
|
||||
const summary = buildPlainTextFallback(messages, previousSummary);
|
||||
return { summary, level: 3 };
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a plain-text fallback summary from metadata extraction only (no LLM).
|
||||
*/
|
||||
function buildPlainTextFallback(
|
||||
messages: AgentMessage[],
|
||||
previousSummary?: string,
|
||||
): string {
|
||||
const parts: string[] = [];
|
||||
|
||||
if (previousSummary) {
|
||||
parts.push(`## Previous Context\n${previousSummary}`);
|
||||
}
|
||||
|
||||
parts.push(
|
||||
`## Compaction Note\nLLM summarization was unavailable. ${messages.length} messages were compacted. ` +
|
||||
`Below is automatically extracted metadata from the removed messages.`,
|
||||
);
|
||||
|
||||
// Extract and append metadata (format functions return strings with leading \n,
|
||||
// designed for direct concatenation — so we concatenate rather than join)
|
||||
const failures = collectToolFailures(messages);
|
||||
const fileOps = collectFileOperations(messages);
|
||||
|
||||
let result = parts.join("\n\n");
|
||||
result += formatToolFailuresSection(failures);
|
||||
result += formatFileOperationsSection(fileOps);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
|
@ -24,6 +24,8 @@ export type CompactionEndEvent = {
|
|||
tokensRemoved?: number | undefined;
|
||||
tokensKept?: number | undefined;
|
||||
reason: "count" | "tokens" | "summary" | "pruning";
|
||||
/** Generated summary text (only present when reason is "summary") */
|
||||
summary?: string | undefined;
|
||||
};
|
||||
|
||||
/** Emitted when an agent encounters an error during execution */
|
||||
|
|
|
|||
|
|
@ -271,12 +271,12 @@ export class Agent {
|
|||
);
|
||||
}
|
||||
|
||||
// 确定 compaction 模式
|
||||
const compactionMode = options.compactionMode ?? "tokens"; // 默认使用 token 模式
|
||||
// Determine compaction mode (default: summary with LLM-based summarization)
|
||||
const compactionMode = options.compactionMode ?? "summary";
|
||||
|
||||
// 获取 API Key(用于 summary 模式)
|
||||
// Resolve API key for summary mode (reuse the agent's own key)
|
||||
const summaryApiKey = compactionMode === "summary"
|
||||
? resolveApiKey(this.resolvedProvider, options.apiKey)
|
||||
? (resolveApiKey(this.resolvedProvider, options.apiKey) ?? this.currentApiKey)
|
||||
: undefined;
|
||||
|
||||
// Store reserveTokens for pre-flight compaction
|
||||
|
|
@ -292,7 +292,7 @@ export class Agent {
|
|||
reserveTokens: options.reserveTokens,
|
||||
targetRatio: options.compactionTargetRatio,
|
||||
minKeepMessages: options.minKeepMessages,
|
||||
// Summary 模式参数
|
||||
// Summary mode parameters
|
||||
model: compactionMode === "summary" ? model : undefined,
|
||||
apiKey: summaryApiKey,
|
||||
customInstructions: options.summaryInstructions,
|
||||
|
|
@ -764,6 +764,7 @@ export class Agent {
|
|||
tokensRemoved: result.tokensRemoved,
|
||||
tokensKept: result.tokensKept,
|
||||
reason: result.reason ?? "tokens",
|
||||
summary: result.summary,
|
||||
};
|
||||
this.emitMulticaEvent(endEvent);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -19,6 +19,10 @@ export type CompactionResult = {
|
|||
tokensKept?: number | undefined;
|
||||
/** Summary generated in summary mode */
|
||||
summary?: string | undefined;
|
||||
/** File operations extracted from compacted messages */
|
||||
fileOperations?: { readFiles: string[]; modifiedFiles: string[] } | undefined;
|
||||
/** Tool failures extracted from compacted messages */
|
||||
toolFailures?: Array<{ toolName: string; summary: string }> | undefined;
|
||||
/** Reason for compaction: count, tokens, summary, or pruning (tool result trimming only) */
|
||||
reason: "count" | "tokens" | "summary" | "pruning";
|
||||
};
|
||||
|
|
@ -249,6 +253,8 @@ export async function compactMessagesAsync(
|
|||
tokensRemoved: result.tokensRemoved,
|
||||
tokensKept: result.tokensKept,
|
||||
summary: result.summary,
|
||||
fileOperations: result.fileOperations,
|
||||
toolFailures: result.toolFailures,
|
||||
reason: "summary",
|
||||
};
|
||||
}
|
||||
|
|
|
|||
|
|
@ -95,8 +95,8 @@ export class SessionManager {
|
|||
this.sessionId = options.sessionId;
|
||||
this.baseDir = options.baseDir;
|
||||
|
||||
// Compaction mode
|
||||
this.compactionMode = options.compactionMode ?? "count";
|
||||
// Compaction mode (default: summary with LLM-based summarization)
|
||||
this.compactionMode = options.compactionMode ?? "summary";
|
||||
|
||||
// Count mode parameters
|
||||
this.maxMessages = options.maxMessages ?? 80;
|
||||
|
|
@ -312,22 +312,37 @@ export class SessionManager {
|
|||
minKeepMessages: this.minKeepMessages,
|
||||
});
|
||||
} else {
|
||||
result = await compactMessagesAsync(workingMessages, {
|
||||
mode: "summary",
|
||||
model,
|
||||
apiKey,
|
||||
contextWindowTokens: this.contextWindowTokens,
|
||||
systemPrompt: this.systemPrompt,
|
||||
reserveTokens: this.reserveTokens,
|
||||
targetRatio: this.targetRatio,
|
||||
minKeepMessages: this.minKeepMessages,
|
||||
customInstructions: this.customInstructions,
|
||||
previousSummary: this.previousSummary,
|
||||
});
|
||||
try {
|
||||
result = await compactMessagesAsync(workingMessages, {
|
||||
mode: "summary",
|
||||
model,
|
||||
apiKey,
|
||||
contextWindowTokens: this.contextWindowTokens,
|
||||
systemPrompt: this.systemPrompt,
|
||||
reserveTokens: this.reserveTokens,
|
||||
targetRatio: this.targetRatio,
|
||||
minKeepMessages: this.minKeepMessages,
|
||||
customInstructions: this.customInstructions,
|
||||
previousSummary: this.previousSummary,
|
||||
});
|
||||
|
||||
// Save summary for next incremental update
|
||||
if (result?.summary) {
|
||||
this.previousSummary = result.summary;
|
||||
// Save summary for next incremental update
|
||||
if (result?.summary) {
|
||||
this.previousSummary = result.summary;
|
||||
}
|
||||
} catch (err) {
|
||||
// Summary compaction failed entirely — fall back to tokens mode
|
||||
console.error(
|
||||
`[SessionManager] Summary compaction failed, falling back to tokens mode: ${err}`,
|
||||
);
|
||||
result = compactMessages(workingMessages, {
|
||||
mode: "tokens",
|
||||
contextWindowTokens: this.contextWindowTokens,
|
||||
systemPrompt: this.systemPrompt,
|
||||
reserveTokens: this.reserveTokens,
|
||||
targetRatio: this.targetRatio,
|
||||
minKeepMessages: this.minKeepMessages,
|
||||
});
|
||||
}
|
||||
}
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -51,8 +51,8 @@ export type AgentOptions = {
|
|||
/**
|
||||
* Compaction mode:
|
||||
* - "count": uses legacy message count
|
||||
* - "tokens": uses token awareness (default)
|
||||
* - "summary": uses LLM to generate summary
|
||||
* - "tokens": uses token awareness
|
||||
* - "summary": uses LLM to generate summary (default)
|
||||
*/
|
||||
compactionMode?: "count" | "tokens" | "summary" | undefined;
|
||||
/** Compaction target utilization ratio (0-1), defaults to 0.5 */
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue