refactor(compaction): remove dead code and legacy count mode

- Remove compactMessagesWithSummary (~100 lines, never called; only
  the Chunked variant was used)
- Remove compactMessagesByCount, findSafeCompactionPoint, and all
  count-mode references (~90 lines)
- Narrow CompactionResult.reason to "tokens" | "summary" | "pruning"
- Narrow compactionMode to "tokens" | "summary" (was "count" | ...)
- Simplify session-manager: remove maxMessages/keepLast params,
  enable tool result pruning by default

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Jiayuan Zhang 2026-02-15 21:37:23 +08:00
parent 92cf312843
commit b412ca902b
6 changed files with 26 additions and 376 deletions

View file

@ -43,7 +43,6 @@ export {
splitMessagesForSummary,
detectSplitTurn,
computeAdaptiveChunkRatio,
compactMessagesWithSummary,
compactMessagesWithChunkedSummary,
} from "./summarization.js";

View file

@ -238,116 +238,6 @@ function createSummaryMessage(summary: string, previousSummary?: string): AgentM
};
}
/**
* Execute summary-based compaction
*
* Uses LLM to generate summary of historical messages, then combines summary with recent messages
*/
export async function compactMessagesWithSummary(
params: SummaryCompactionParams,
): Promise<SummaryCompactionResult | null> {
const {
messages,
model,
apiKey,
availableTokens,
targetRatio,
minKeepMessages,
reserveTokens = 2048,
customInstructions,
previousSummary,
signal,
} = params;
// Split messages
const split = splitMessagesForSummary(messages, availableTokens, {
targetRatio,
minKeepMessages,
});
if (!split) {
return null;
}
let { toSummarize, toKeep } = split;
// Detect and handle split turn
const splitTurn = detectSplitTurn(toSummarize, toKeep);
let splitPrefixSummary = "";
if (splitTurn) {
toSummarize = splitTurn.adjustedToSummarize;
toKeep = splitTurn.adjustedToKeep;
// Summarize the split prefix separately
const instructions = customInstructions || DEFAULT_SUMMARY_INSTRUCTIONS;
const prefixResult = await summarizeWithFallback({
messages: splitTurn.splitPrefix,
model,
reserveTokens,
apiKey,
signal,
instructions,
previousSummary,
availableTokens,
});
splitPrefixSummary = prefixResult.summary;
}
// Generate summary with fallback (toSummarize no longer contains split prefix messages)
const instructions = customInstructions || DEFAULT_SUMMARY_INSTRUCTIONS;
let finalSummary = "";
if (toSummarize.length > 0) {
const { summary } = await summarizeWithFallback({
messages: toSummarize,
model,
reserveTokens,
apiKey,
signal,
instructions,
previousSummary,
availableTokens,
});
finalSummary = summary;
}
// Append split prefix summary if present
if (splitPrefixSummary) {
finalSummary += (finalSummary ? "\n\n" : "") + `## Split Turn Context\n${splitPrefixSummary}`;
}
// Append metadata sections (all compacted = adjusted toSummarize + splitPrefix)
const allCompactedMessages = splitTurn
? [...toSummarize, ...splitTurn.splitPrefix]
: toSummarize;
const failures = collectToolFailures(allCompactedMessages);
const fileOps = collectFileOperations(allCompactedMessages);
finalSummary += formatToolFailuresSection(failures);
finalSummary += formatFileOperationsSection(fileOps);
// Create summary message
const summaryMessage = createSummaryMessage(finalSummary, previousSummary);
// Combine results
const kept = [summaryMessage, ...toKeep];
const tokensRemoved = estimateMessagesTokens(allCompactedMessages);
const tokensKept = estimateMessagesTokens(kept);
return {
kept,
removedCount: allCompactedMessages.length,
tokensRemoved,
tokensKept,
summary: finalSummary,
reason: "summary",
fileOperations: (fileOps.readFiles.length > 0 || fileOps.modifiedFiles.length > 0) ? fileOps : undefined,
toolFailures: failures.length > 0 ? failures : undefined,
};
}
/**
* Generate summary in chunks (for very large history)
*

View file

@ -1,6 +1,5 @@
import { describe, it, expect, vi } from "vitest";
import {
compactMessagesByCount,
compactMessagesByTokens,
compactMessages,
type CompactionResult,
@ -27,96 +26,6 @@ describe("compaction", () => {
})) as AgentMessage[];
}
function createMessagesWithToolUse(): AgentMessage[] {
return [
{ role: "user", content: "Start" },
{
role: "assistant",
content: [{ type: "tool_use", id: "tool-1", name: "test", input: {} }],
} as any,
{
role: "user",
content: [{ type: "tool_result", tool_use_id: "tool-1", content: "Result" }],
} as any,
{ role: "assistant", content: "Done" },
{ role: "user", content: "Next message" },
];
}
describe("compactMessagesByCount", () => {
it("should return null when under max messages", () => {
const messages = createMessages(50);
const result = compactMessagesByCount(messages, 80, 60);
expect(result).toBeNull();
});
it("should compact when over max messages", () => {
const messages = createMessages(100);
const result = compactMessagesByCount(messages, 80, 60);
expect(result).not.toBeNull();
expect(result!.reason).toBe("count");
expect(result!.kept.length).toBeLessThanOrEqual(100);
expect(result!.removedCount).toBeGreaterThan(0);
});
it("should keep the specified number of last messages", () => {
const messages = createMessages(100);
const result = compactMessagesByCount(messages, 80, 50);
if (result) {
// Should keep approximately keepLast messages
expect(result.kept.length).toBeGreaterThanOrEqual(40);
expect(result.kept.length).toBeLessThanOrEqual(60);
}
});
it("should return null when exact at max messages", () => {
const messages = createMessages(80);
const result = compactMessagesByCount(messages, 80, 60);
expect(result).toBeNull();
});
it("should not break tool_use/tool_result pairs", () => {
// Create many messages followed by a tool pair
const regularMessages = createMessages(70);
const toolMessages = createMessagesWithToolUse();
const messages = [...regularMessages, ...toolMessages];
const result = compactMessagesByCount(messages, 80, 20);
if (result) {
// Check that we didn't end up with orphaned tool_result
let hasOrphanedToolResult = false;
for (let i = 0; i < result.kept.length; i++) {
const msg = result.kept[i] as any;
if (Array.isArray(msg.content)) {
const hasToolResult = msg.content.some((b: any) => b.type === "tool_result");
if (hasToolResult) {
// Check if previous message has corresponding tool_use
const prevMsg = result.kept[i - 1] as any;
if (!prevMsg || !Array.isArray(prevMsg.content)) {
hasOrphanedToolResult = true;
}
}
}
}
// This test verifies the safe compaction point logic
// The exact behavior depends on findSafeCompactionPoint implementation
}
});
it("should return null when would keep almost all messages", () => {
const messages = createMessages(85);
const result = compactMessagesByCount(messages, 80, 82);
// If we'd only remove 2-3 messages, should return null
if (result) {
expect(result.removedCount).toBeGreaterThan(2);
}
});
});
describe("compactMessagesByTokens", () => {
it("should return null when under token limit", () => {
const messages = createMessages(5);
@ -161,41 +70,16 @@ describe("compaction", () => {
});
describe("compactMessages (unified entry point)", () => {
describe("count mode", () => {
it("should use count-based compaction", () => {
const messages = createMessages(100);
const result = compactMessages(messages, {
mode: "count",
maxMessages: 80,
keepLast: 60,
});
expect(result).not.toBeNull();
expect(result!.reason).toBe("count");
});
it("should use default max and keep values", () => {
const messages = createMessages(100);
const result = compactMessages(messages, {
mode: "count",
});
// Default: maxMessages: 80, keepLast: 60
expect(result).not.toBeNull();
expect(result!.reason).toBe("count");
});
});
describe("tokens mode", () => {
it("should use token-based compaction when utilization is high", () => {
const messages = createMessages(100);
// ~300 message tokens (real estimator: ~3 tokens/msg)
// systemPromptTokens ≈ 7, reserveTokens = 0
// available = 500 - 7 = 493
// utilization = (300 * 1.5) / 493 ≈ 0.91 > 0.8 → should compact
// systemPromptTokens ≈ 4, reserveTokens = 0
// available = 400 - 4 = 396
// utilization = (300 * 1.2) / 396 ≈ 0.91 > 0.8 → should compact
const result = compactMessages(messages, {
mode: "tokens",
contextWindowTokens: 500,
contextWindowTokens: 400,
systemPrompt: "System prompt",
reserveTokens: 0,
});
@ -207,8 +91,8 @@ describe("compaction", () => {
it("should return null when utilization is low", () => {
const messages = createMessages(5);
// ~15 message tokens
// available = 10000 - 7 - 1024 = 8969
// utilization = (15 * 1.5) / 8969 ≈ 0.003 < 0.8
// available = 10000 - 4 - 1024 = 8972
// utilization = (15 * 1.2) / 8972 ≈ 0.002 < 0.8
const result = compactMessages(messages, {
mode: "tokens",
contextWindowTokens: 10000,

View file

@ -1,11 +1,9 @@
import type { AgentMessage } from "@mariozechner/pi-agent-core";
import type { Model } from "@mariozechner/pi-ai";
import {
estimateMessagesTokens,
compactMessagesTokenAware,
estimateTokenUsage,
shouldCompact as shouldCompactTokens,
compactMessagesWithSummary,
compactMessagesWithChunkedSummary,
COMPACTION_TARGET_RATIO,
MIN_KEEP_MESSAGES,
@ -30,96 +28,12 @@ export type CompactionResult = {
fileOperations?: { readFiles: string[]; modifiedFiles: string[] } | undefined;
/** Tool failures extracted from compacted messages */
toolFailures?: Array<{ toolName: string; summary: string }> | undefined;
/** Reason for compaction: count, tokens, summary, or pruning (tool result trimming only) */
reason: "count" | "tokens" | "summary" | "pruning";
/** Reason for compaction: tokens, summary, or pruning (tool result trimming only) */
reason: "tokens" | "summary" | "pruning";
/** Tool result pruning statistics (when Phase 1 pruning was applied) */
pruningStats?: PruningStats | undefined;
};
/**
* Find a safe compaction point that doesn't break tool_use/tool_result pairs.
* Returns the index to start keeping messages from.
*/
function findSafeCompactionPoint(messages: AgentMessage[], targetStart: number): number {
let start = targetStart;
// Move forward until we find a safe starting point
while (start < messages.length) {
const msg = messages[start];
if (!msg) {
start++;
continue;
}
// Safe to start from a user message
if (msg.role === "user") {
// But make sure it's not a toolResult without corresponding tool_use
const msgAny = msg as any;
if (Array.isArray(msgAny.content)) {
const hasToolResult = msgAny.content.some((b: any) => b.type === "tool_result");
if (!hasToolResult) {
break; // Safe: user message without tool_result
}
} else {
break; // Safe: simple user message
}
}
// toolResult messages need their corresponding tool_use, skip them
// assistant messages are ok to start from if they don't reference missing tool calls
if (msg.role === "assistant") {
// Check if previous messages have the required tool_use for any following tool_result
const nextMsg = messages[start + 1];
if (nextMsg && nextMsg.role === "user") {
const nextAny = nextMsg as any;
if (Array.isArray(nextAny.content)) {
const hasToolResult = nextAny.content.some((b: any) => b.type === "tool_result");
if (hasToolResult) {
// This assistant message has tool_use that's needed by next message
break;
}
}
}
}
start++;
}
return start;
}
/**
* Simple compression based on message count (legacy logic, maintains backward compatibility)
*/
export function compactMessagesByCount(
messages: AgentMessage[],
maxMessages: number,
keepLast: number,
): CompactionResult | null {
if (messages.length <= maxMessages) return null;
const targetStart = messages.length - keepLast;
const safeStart = findSafeCompactionPoint(messages, targetStart);
// If we can't find a safe point, don't compact
if (safeStart >= messages.length) {
return null;
}
const kept = messages.slice(safeStart);
// Don't compact if we'd keep almost everything anyway
if (kept.length >= messages.length - 2) {
return null;
}
return {
kept,
removedCount: messages.length - kept.length,
reason: "count",
};
}
/**
* Token-based intelligent compression
*/
@ -143,13 +57,9 @@ export function compactMessagesByTokens(
};
}
/** Synchronous compaction options (count/tokens modes) */
export type SyncCompactionOptions = {
mode: "count" | "tokens";
// count mode parameters
maxMessages?: number | undefined;
keepLast?: number | undefined;
// tokens mode parameters
/** Token-based compaction options */
export type TokenCompactionOptions = {
mode: "tokens";
contextWindowTokens?: number | undefined;
systemPrompt?: string | undefined;
reserveTokens?: number | undefined;
@ -163,39 +73,28 @@ export type SummaryCompactionOptions = {
// Required parameters
model: Model<any>;
apiKey: string;
// tokens mode parameters (reused)
// Token parameters (reused)
contextWindowTokens?: number | undefined;
systemPrompt?: string | undefined;
reserveTokens?: number | undefined;
targetRatio?: number | undefined;
minKeepMessages?: number | undefined;
// summary-specific parameters
// Summary-specific parameters
customInstructions?: string | undefined;
previousSummary?: string | undefined;
signal?: AbortSignal | undefined;
maxChunkTokens?: number | undefined;
};
export type CompactionOptions = SyncCompactionOptions | SummaryCompactionOptions;
export type CompactionOptions = TokenCompactionOptions | SummaryCompactionOptions;
/**
* Unified compaction entry point (synchronous version, for count/tokens modes)
*
* Selects compaction strategy based on mode
* Synchronous token-based compaction
*/
export function compactMessages(
messages: AgentMessage[],
options: SyncCompactionOptions,
options: TokenCompactionOptions,
): CompactionResult | null {
if (options.mode === "count") {
return compactMessagesByCount(
messages,
options.maxMessages ?? 80,
options.keepLast ?? 60,
);
}
// Token mode
const contextWindowTokens = options.contextWindowTokens ?? 200_000;
const estimation = estimateTokenUsage({
messages,
@ -204,7 +103,6 @@ export function compactMessages(
reserveTokens: options.reserveTokens,
});
// 检查是否需要压缩
if (!shouldCompactTokens(estimation)) {
return null;
}

View file

@ -33,12 +33,8 @@ export type SessionManagerOptions = {
baseDir?: string | undefined;
// Compaction mode configuration
/** Compaction mode: "count" uses message count, "tokens" uses token awareness, "summary" uses LLM summary */
compactionMode?: "count" | "tokens" | "summary" | undefined;
// Count mode parameters
maxMessages?: number | undefined;
keepLast?: number | undefined;
/** Compaction mode: "tokens" uses token awareness, "summary" uses LLM summary (default) */
compactionMode?: "tokens" | "summary" | undefined;
// Token mode parameters
/** Context window token count */
@ -61,7 +57,7 @@ export type SessionManagerOptions = {
customInstructions?: string | undefined;
// Tool result pruning
/** Whether to enable tool result pruning before compaction (default: true in tokens/summary mode) */
/** Whether to enable tool result pruning before compaction (default: true) */
enableToolResultPruning?: boolean | undefined;
/** Tool result pruning settings */
toolResultPruning?: Partial<ToolResultPruningSettings> | undefined;
@ -74,10 +70,7 @@ export type SessionManagerOptions = {
export class SessionManager {
private readonly sessionId: string;
private readonly baseDir: string | undefined;
private readonly compactionMode: "count" | "tokens" | "summary";
// Count mode
private readonly maxMessages: number;
private readonly keepLast: number;
private readonly compactionMode: "tokens" | "summary";
// Token mode
private readonly contextWindowTokens: number;
private systemPrompt: string | undefined;
@ -105,10 +98,6 @@ export class SessionManager {
// Compaction mode (default: summary with LLM-based summarization)
this.compactionMode = options.compactionMode ?? "summary";
// Count mode parameters
this.maxMessages = options.maxMessages ?? 80;
this.keepLast = options.keepLast ?? 60;
// Token mode parameters
this.contextWindowTokens = options.contextWindowTokens ?? 200_000;
this.systemPrompt = options.systemPrompt;
@ -121,10 +110,8 @@ export class SessionManager {
this.apiKey = options.apiKey;
this.customInstructions = options.customInstructions;
// Tool result pruning (enabled by default in tokens/summary mode)
this.enableToolResultPruning =
options.enableToolResultPruning ??
(this.compactionMode === "tokens" || this.compactionMode === "summary");
// Tool result pruning (enabled by default)
this.enableToolResultPruning = options.enableToolResultPruning ?? true;
this.toolResultPruning = options.toolResultPruning;
// Observability
@ -164,7 +151,7 @@ export class SessionManager {
/**
* Get current compaction mode
*/
getCompactionMode(): "count" | "tokens" | "summary" {
getCompactionMode(): "tokens" | "summary" {
return this.compactionMode;
}
@ -264,10 +251,6 @@ export class SessionManager {
/** Check whether compaction would trigger for the given messages (without executing it) */
needsCompaction(messages: AgentMessage[]): boolean {
if (this.compactionMode === "count") {
return messages.length > this.maxMessages;
}
// Token and summary modes use the same token-based threshold
const estimation = estimateTokenUsage({
messages,
systemPrompt: this.systemPrompt,
@ -376,12 +359,9 @@ export class SessionManager {
}
}
} else {
// tokens mode
result = compactMessages(workingMessages, {
mode: this.compactionMode,
// Count mode parameters
maxMessages: this.maxMessages,
keepLast: this.keepLast,
// Token mode parameters
mode: "tokens",
contextWindowTokens: this.contextWindowTokens,
systemPrompt: this.systemPrompt,
reserveTokens: this.reserveTokens,

View file

@ -50,11 +50,10 @@ export type AgentOptions = {
reserveTokens?: number | undefined;
/**
* Compaction mode:
* - "count": uses legacy message count
* - "tokens": uses token awareness
* - "summary": uses LLM to generate summary (default)
*/
compactionMode?: "count" | "tokens" | "summary" | undefined;
compactionMode?: "tokens" | "summary" | undefined;
/** Compaction target utilization ratio (0-1), defaults to 0.5 */
compactionTargetRatio?: number | undefined;
/** Minimum messages to keep, defaults to 10 */