Merge pull request #95 from multica-ai/Bohan-J/tool-result-pruning

feat(context): add tool result pruning for smarter context management
This commit is contained in:
Bohan Jiang 2026-02-05 16:28:41 +08:00 committed by GitHub
commit 363c65b392
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 871 additions and 12 deletions

View file

@ -44,3 +44,13 @@ export {
compactMessagesWithSummary,
compactMessagesWithChunkedSummary,
} from "./summarization.js";
// Tool result pruning
export type {
ToolResultPruningSettings,
ToolResultPruningResult,
} from "./tool-result-pruning.js";
export {
DEFAULT_TOOL_RESULT_PRUNING_SETTINGS,
pruneToolResults,
} from "./tool-result-pruning.js";

View file

@ -0,0 +1,285 @@
import { describe, it, expect } from "vitest";
import type { AgentMessage } from "@mariozechner/pi-agent-core";
import { pruneToolResults, DEFAULT_TOOL_RESULT_PRUNING_SETTINGS } from "./tool-result-pruning.js";
// Helper to create a user message with tool result
function createToolResultMessage(
toolName: string,
content: string,
toolUseId: string = "tool-123",
): AgentMessage {
return {
role: "user",
content: [
{
type: "tool_result",
tool_use_id: toolUseId,
name: toolName,
content: [{ type: "text", text: content }],
},
],
} as unknown as AgentMessage;
}
// Helper to create an assistant message
function createAssistantMessage(text: string): AgentMessage {
return {
role: "assistant",
content: [{ type: "text", text }],
} as unknown as AgentMessage;
}
// Helper to create a user message
function createUserMessage(text: string): AgentMessage {
return {
role: "user",
content: text,
} as unknown as AgentMessage;
}
describe("pruneToolResults", () => {
it("returns unchanged if utilization is below softTrimRatio", () => {
const messages = [
createUserMessage("Hello"),
createAssistantMessage("Hi there!"),
createToolResultMessage("read", "Short content"),
];
const result = pruneToolResults({
messages,
contextWindowTokens: 200_000, // Very large window
});
expect(result.changed).toBe(false);
expect(result.messages).toBe(messages);
expect(result.softTrimmed).toBe(0);
expect(result.hardCleared).toBe(0);
});
it("soft trims large tool results", () => {
// Create a message with a large tool result (5000 chars)
const largeContent = "A".repeat(5000);
const messages = [
createUserMessage("Hello"),
createAssistantMessage("Processing..."),
createToolResultMessage("read", largeContent),
createAssistantMessage("Done!"),
createAssistantMessage("Follow up"),
createAssistantMessage("Another one"),
createAssistantMessage("Protected message"), // This is protected (keepLastAssistants=3)
];
const result = pruneToolResults({
messages,
contextWindowTokens: 10_000, // Small window to trigger pruning
settings: {
softTrimRatio: 0.1, // Low threshold to ensure pruning
},
});
expect(result.changed).toBe(true);
expect(result.softTrimmed).toBe(1);
// Check that the trimmed message contains head + tail
const trimmedMsg = result.messages[2] as any;
const trimmedText = trimmedMsg.content[0].content[0].text;
expect(trimmedText).toContain("A".repeat(100)); // Should have some head content
expect(trimmedText).toContain("..."); // Truncation marker
expect(trimmedText).toContain("[Tool result trimmed:");
});
it("hard clears when utilization exceeds hardClearRatio", () => {
// Create multiple messages with large tool results
const largeContent = "X".repeat(10000);
const messages = [
createUserMessage("Start"),
createAssistantMessage("Processing 1"),
createToolResultMessage("read", largeContent, "tool-1"),
createAssistantMessage("Processing 2"),
createToolResultMessage("exec", largeContent, "tool-2"),
createAssistantMessage("Processing 3"),
createToolResultMessage("glob", largeContent, "tool-3"),
createAssistantMessage("Done 1"), // Protected
createAssistantMessage("Done 2"), // Protected
createAssistantMessage("Done 3"), // Protected
];
const result = pruneToolResults({
messages,
contextWindowTokens: 5_000, // Very small window
settings: {
softTrimRatio: 0.1,
hardClearRatio: 0.2,
minPrunableToolChars: 1000, // Lower threshold for test
hardClear: {
enabled: true,
placeholder: "[Cleared]",
},
},
});
expect(result.changed).toBe(true);
// Should have cleared at least some tool results
expect(result.hardCleared).toBeGreaterThan(0);
expect(result.charsSaved).toBeGreaterThan(0);
});
it("protects last N assistant messages", () => {
const messages = [
createUserMessage("Hello"),
createAssistantMessage("First"),
createToolResultMessage("read", "A".repeat(5000), "tool-1"), // Should be prunable
createAssistantMessage("Second"), // Protected (keepLastAssistants=3)
createToolResultMessage("read", "B".repeat(5000), "tool-2"), // In protected zone, should NOT be pruned
createAssistantMessage("Third"), // Protected
createAssistantMessage("Fourth"), // Protected
];
const result = pruneToolResults({
messages,
contextWindowTokens: 5_000,
settings: {
softTrimRatio: 0.1,
keepLastAssistants: 3,
},
});
// The first tool result (before protected zone) may be pruned
// But the second one (after "Second" assistant which is in protected zone) should not be
if (result.changed) {
// Check that tool-2 result is NOT modified (it's in protected zone)
const tool2Msg = result.messages[4] as any;
const tool2Content = tool2Msg.content[0].content[0].text;
expect(tool2Content).toBe("B".repeat(5000)); // Unchanged
}
});
it("never prunes before first user message", () => {
const messages = [
createAssistantMessage("Bootstrap read"), // Before first user message
createToolResultMessage("read", "A".repeat(5000), "tool-1"), // Should NOT be pruned
createUserMessage("Hello"), // First user message
createAssistantMessage("Response"),
createToolResultMessage("read", "B".repeat(5000), "tool-2"), // Can be pruned
createAssistantMessage("Done 1"),
createAssistantMessage("Done 2"),
createAssistantMessage("Done 3"),
];
const result = pruneToolResults({
messages,
contextWindowTokens: 5_000,
settings: {
softTrimRatio: 0.1,
},
});
// The first tool result (before first user message) should NOT be modified
const tool1Msg = result.messages[1] as any;
const tool1Content = tool1Msg.content[0].content[0].text;
expect(tool1Content).toBe("A".repeat(5000)); // Unchanged - bootstrap protection
});
it("respects tool deny list", () => {
const messages = [
createUserMessage("Hello"),
createAssistantMessage("Processing"),
createToolResultMessage("read", "A".repeat(5000), "tool-1"),
createAssistantMessage("Done 1"),
createAssistantMessage("Done 2"),
createAssistantMessage("Done 3"),
];
const result = pruneToolResults({
messages,
contextWindowTokens: 5_000,
settings: {
softTrimRatio: 0.1,
tools: {
deny: ["read"], // Don't prune read tool results
},
},
});
// read tool should not be pruned
expect(result.changed).toBe(false);
});
it("respects tool allow list", () => {
const messages = [
createUserMessage("Hello"),
createAssistantMessage("Processing"),
createToolResultMessage("read", "A".repeat(5000), "tool-1"),
createToolResultMessage("exec", "B".repeat(5000), "tool-2"),
createAssistantMessage("Done 1"),
createAssistantMessage("Done 2"),
createAssistantMessage("Done 3"),
];
const result = pruneToolResults({
messages,
contextWindowTokens: 5_000,
settings: {
softTrimRatio: 0.1,
tools: {
allow: ["exec"], // Only prune exec tool results
},
},
});
if (result.changed) {
// read tool should not be pruned
const tool1Msg = result.messages[2] as any;
const tool1Content = tool1Msg.content[0].content[0].text;
expect(tool1Content).toBe("A".repeat(5000)); // Unchanged
}
});
it("skips tool results with images", () => {
const messages = [
createUserMessage("Hello"),
createAssistantMessage("Processing"),
{
role: "user",
content: [
{
type: "tool_result",
tool_use_id: "tool-1",
name: "screenshot",
content: [
{ type: "image", source: { type: "base64", data: "abc123" } },
{ type: "text", text: "A".repeat(5000) },
],
},
],
} as unknown as AgentMessage,
createAssistantMessage("Done 1"),
createAssistantMessage("Done 2"),
createAssistantMessage("Done 3"),
];
const result = pruneToolResults({
messages,
contextWindowTokens: 5_000,
settings: {
softTrimRatio: 0.1,
},
});
// Image-containing tool result should not be pruned
expect(result.softTrimmed).toBe(0);
expect(result.hardCleared).toBe(0);
});
});
describe("DEFAULT_TOOL_RESULT_PRUNING_SETTINGS", () => {
it("has expected default values", () => {
expect(DEFAULT_TOOL_RESULT_PRUNING_SETTINGS.softTrimRatio).toBe(0.3);
expect(DEFAULT_TOOL_RESULT_PRUNING_SETTINGS.hardClearRatio).toBe(0.5);
expect(DEFAULT_TOOL_RESULT_PRUNING_SETTINGS.keepLastAssistants).toBe(3);
expect(DEFAULT_TOOL_RESULT_PRUNING_SETTINGS.softTrim.maxChars).toBe(4000);
expect(DEFAULT_TOOL_RESULT_PRUNING_SETTINGS.softTrim.headChars).toBe(1500);
expect(DEFAULT_TOOL_RESULT_PRUNING_SETTINGS.softTrim.tailChars).toBe(1500);
expect(DEFAULT_TOOL_RESULT_PRUNING_SETTINGS.hardClear.enabled).toBe(true);
});
});

View file

@ -0,0 +1,510 @@
/**
* Tool Result Pruning
*
* Smart pruning of tool results to reduce context window usage while preserving
* useful information. Implements two-phase pruning:
*
* 1. Soft Trim: Keep head + tail of large tool results
* 2. Hard Clear: Replace old tool results with placeholder
*
* Based on OpenClaw's microcompact-style context pruning.
*/
import type { AgentMessage } from "@mariozechner/pi-agent-core";
// ─── Types ───────────────────────────────────────────────────────────────────
export type ToolResultPruningSettings = {
/** Utilization ratio to start soft trimming (default: 0.3) */
softTrimRatio: number;
/** Utilization ratio to start hard clearing (default: 0.5) */
hardClearRatio: number;
/** Minimum prunable tool result chars to consider hard clear (default: 50000) */
minPrunableToolChars: number;
/** Number of recent assistant messages to protect from pruning (default: 3) */
keepLastAssistants: number;
/** Soft trim settings */
softTrim: {
/** Max chars before triggering soft trim (default: 4000) */
maxChars: number;
/** Chars to keep from start (default: 1500) */
headChars: number;
/** Chars to keep from end (default: 1500) */
tailChars: number;
};
/** Hard clear settings */
hardClear: {
/** Whether hard clear is enabled (default: true) */
enabled: boolean;
/** Placeholder text for cleared results */
placeholder: string;
};
/** Tool names to allow/deny pruning */
tools?: {
allow?: string[];
deny?: string[];
};
};
export const DEFAULT_TOOL_RESULT_PRUNING_SETTINGS: ToolResultPruningSettings = {
softTrimRatio: 0.3,
hardClearRatio: 0.5,
minPrunableToolChars: 50_000,
keepLastAssistants: 3,
softTrim: {
maxChars: 4_000,
headChars: 1_500,
tailChars: 1_500,
},
hardClear: {
enabled: true,
placeholder: "[Tool result cleared to save context space]",
},
};
export type ToolResultPruningResult = {
/** Pruned messages */
messages: AgentMessage[];
/** Whether any changes were made */
changed: boolean;
/** Number of soft-trimmed results */
softTrimmed: number;
/** Number of hard-cleared results */
hardCleared: number;
/** Estimated chars saved */
charsSaved: number;
};
// ─── Constants ───────────────────────────────────────────────────────────────
const CHARS_PER_TOKEN_ESTIMATE = 4;
const IMAGE_CHAR_ESTIMATE = 8_000;
// ─── Helper Functions ────────────────────────────────────────────────────────
/**
* Extract text content from a tool result content block.
*/
function extractToolResultText(content: unknown): string {
if (typeof content === "string") return content;
if (Array.isArray(content)) {
const parts: string[] = [];
for (const block of content) {
if (typeof block === "string") {
parts.push(block);
} else if (block && typeof block === "object") {
if ("text" in block && typeof block.text === "string") {
parts.push(block.text);
}
}
}
return parts.join("\n");
}
return "";
}
/**
* Check if content contains images.
*/
function hasImageContent(content: unknown): boolean {
if (!Array.isArray(content)) return false;
for (const block of content) {
if (block && typeof block === "object" && "type" in block) {
if (block.type === "image") return true;
}
}
return false;
}
/**
* Estimate character count for a message.
*/
function estimateMessageChars(message: AgentMessage): number {
const msgAny = message as any;
if (message.role === "user") {
const content = msgAny.content;
if (typeof content === "string") return content.length;
if (!Array.isArray(content)) return 0;
let chars = 0;
for (const block of content) {
if (typeof block === "string") {
chars += block.length;
} else if (block && typeof block === "object") {
if (block.type === "text" && typeof block.text === "string") {
chars += block.text.length;
} else if (block.type === "tool_result") {
chars += extractToolResultText(block.content).length;
} else if (block.type === "image") {
chars += IMAGE_CHAR_ESTIMATE;
}
}
}
return chars;
}
if (message.role === "assistant") {
const content = msgAny.content;
if (typeof content === "string") return content.length;
if (!Array.isArray(content)) return 0;
let chars = 0;
for (const block of content) {
if (typeof block === "string") {
chars += block.length;
} else if (block && typeof block === "object") {
if (block.type === "text" && typeof block.text === "string") {
chars += block.text.length;
} else if (block.type === "thinking" && typeof block.thinking === "string") {
chars += block.thinking.length;
} else if (block.type === "toolCall" || block.type === "tool_use") {
try {
chars += JSON.stringify(block.arguments ?? block.input ?? {}).length;
} catch {
chars += 128;
}
}
}
}
return chars;
}
return 256;
}
/**
* Estimate total character count for messages.
*/
function estimateContextChars(messages: AgentMessage[]): number {
return messages.reduce((sum, m) => sum + estimateMessageChars(m), 0);
}
/**
* Find the index where we should stop protecting assistant messages.
* Returns null if not enough assistant messages exist.
*/
function findAssistantCutoffIndex(
messages: AgentMessage[],
keepLastAssistants: number,
): number | null {
if (keepLastAssistants <= 0) return messages.length;
let remaining = keepLastAssistants;
for (let i = messages.length - 1; i >= 0; i--) {
if (messages[i]?.role !== "assistant") continue;
remaining--;
if (remaining === 0) return i;
}
return null;
}
/**
* Check if a user message is a "real" user message (not just tool results).
* Tool results are sent as user messages but they're not real user input.
*/
function isRealUserMessage(message: AgentMessage): boolean {
if (message.role !== "user") return false;
const msgAny = message as any;
const content = msgAny.content;
// String content is a real user message
if (typeof content === "string") return true;
// Array content - check if it has any non-tool-result blocks
if (Array.isArray(content)) {
for (const block of content) {
if (typeof block === "string") return true;
if (block && typeof block === "object") {
// Any type other than tool_result is real user content
if (block.type !== "tool_result") return true;
}
}
// Only tool_result blocks - not a real user message
return false;
}
return true;
}
/**
* Find the index of the first real user message (not tool results).
* This is used for bootstrap protection - we never prune before the first real user input.
*/
function findFirstUserIndex(messages: AgentMessage[]): number | null {
for (let i = 0; i < messages.length; i++) {
const msg = messages[i];
if (msg && isRealUserMessage(msg)) return i;
}
return null;
}
/**
* Check if a tool should be pruned based on settings.
*/
function isToolPrunable(toolName: string, settings: ToolResultPruningSettings): boolean {
const { tools } = settings;
if (!tools) return true;
// If deny list exists and tool is in it, don't prune
if (tools.deny?.includes(toolName)) return false;
// If allow list exists, only prune if tool is in it
if (tools.allow && tools.allow.length > 0) {
return tools.allow.includes(toolName);
}
return true;
}
/**
* Take first N characters from text.
*/
function takeHead(text: string, maxChars: number): string {
if (maxChars <= 0) return "";
if (text.length <= maxChars) return text;
return text.slice(0, maxChars);
}
/**
* Take last N characters from text.
*/
function takeTail(text: string, maxChars: number): string {
if (maxChars <= 0) return "";
if (text.length <= maxChars) return text;
return text.slice(text.length - maxChars);
}
/**
* Soft trim a tool result text.
*/
function softTrimText(
text: string,
settings: ToolResultPruningSettings,
): { trimmed: string; saved: number } | null {
const { maxChars, headChars, tailChars } = settings.softTrim;
if (text.length <= maxChars) return null;
if (headChars + tailChars >= text.length) return null;
const head = takeHead(text, headChars);
const tail = takeTail(text, tailChars);
const note = `\n\n[Tool result trimmed: kept first ${headChars} chars and last ${tailChars} chars of ${text.length} chars.]`;
const trimmed = `${head}\n...\n${tail}${note}`;
return {
trimmed,
saved: text.length - trimmed.length,
};
}
/**
* Process a user message containing tool results.
* Returns modified message if any tool results were trimmed/cleared.
*/
function processUserMessageToolResults(
message: AgentMessage,
settings: ToolResultPruningSettings,
mode: "soft" | "hard",
): { message: AgentMessage; changed: boolean; charsSaved: number } {
const msgAny = message as any;
const content = msgAny.content;
if (!Array.isArray(content)) {
return { message, changed: false, charsSaved: 0 };
}
let changed = false;
let charsSaved = 0;
const newContent: any[] = [];
for (const block of content) {
if (!block || typeof block !== "object" || block.type !== "tool_result") {
newContent.push(block);
continue;
}
const toolName = block.name ?? "unknown";
// Skip non-prunable tools
if (!isToolPrunable(toolName, settings)) {
newContent.push(block);
continue;
}
// Skip image-containing tool results
if (hasImageContent(block.content)) {
newContent.push(block);
continue;
}
const originalText = extractToolResultText(block.content);
if (mode === "soft") {
const result = softTrimText(originalText, settings);
if (result) {
newContent.push({
...block,
content: [{ type: "text", text: result.trimmed }],
});
changed = true;
charsSaved += result.saved;
} else {
newContent.push(block);
}
} else {
// Hard clear
newContent.push({
...block,
content: [{ type: "text", text: settings.hardClear.placeholder }],
});
changed = true;
charsSaved += originalText.length - settings.hardClear.placeholder.length;
}
}
if (!changed) {
return { message, changed: false, charsSaved: 0 };
}
return {
message: { ...message, content: newContent } as AgentMessage,
changed: true,
charsSaved,
};
}
// ─── Main Functions ──────────────────────────────────────────────────────────
/**
* Prune tool results in messages to reduce context window usage.
*
* Two-phase approach:
* 1. Soft Trim (at softTrimRatio): Keep head + tail of large tool results
* 2. Hard Clear (at hardClearRatio): Replace old tool results with placeholder
*
* Protections:
* - Never prunes before first user message (protects bootstrap/identity reads)
* - Protects last N assistant messages and their corresponding tool results
* - Skips image-containing tool results
* - Respects tool allow/deny lists
*/
export function pruneToolResults(params: {
messages: AgentMessage[];
contextWindowTokens: number;
settings?: Partial<ToolResultPruningSettings>;
}): ToolResultPruningResult {
const { messages, contextWindowTokens } = params;
const settings: ToolResultPruningSettings = {
...DEFAULT_TOOL_RESULT_PRUNING_SETTINGS,
...params.settings,
softTrim: {
...DEFAULT_TOOL_RESULT_PRUNING_SETTINGS.softTrim,
...params.settings?.softTrim,
},
hardClear: {
...DEFAULT_TOOL_RESULT_PRUNING_SETTINGS.hardClear,
...params.settings?.hardClear,
},
};
const charWindow = contextWindowTokens * CHARS_PER_TOKEN_ESTIMATE;
if (charWindow <= 0) {
return { messages, changed: false, softTrimmed: 0, hardCleared: 0, charsSaved: 0 };
}
// Find cutoff index for protected assistant messages
const cutoffIndex = findAssistantCutoffIndex(messages, settings.keepLastAssistants);
if (cutoffIndex === null) {
return { messages, changed: false, softTrimmed: 0, hardCleared: 0, charsSaved: 0 };
}
// Never prune before first user message (bootstrap protection)
const firstUserIndex = findFirstUserIndex(messages);
const pruneStartIndex = firstUserIndex === null ? messages.length : firstUserIndex;
// Calculate current utilization
let totalChars = estimateContextChars(messages);
let ratio = totalChars / charWindow;
// No pruning needed
if (ratio < settings.softTrimRatio) {
return { messages, changed: false, softTrimmed: 0, hardCleared: 0, charsSaved: 0 };
}
let result = messages.slice();
let changed = false;
let softTrimmed = 0;
let hardCleared = 0;
let charsSaved = 0;
// Track which messages have prunable tool results
const prunableIndexes: number[] = [];
// Phase 1: Soft Trim
for (let i = pruneStartIndex; i < cutoffIndex; i++) {
const msg = result[i];
if (!msg || msg.role !== "user") continue;
const msgAny = msg as any;
if (!Array.isArray(msgAny.content)) continue;
// Check if this message has tool results
const hasToolResult = msgAny.content.some(
(b: any) => b && typeof b === "object" && b.type === "tool_result",
);
if (!hasToolResult) continue;
prunableIndexes.push(i);
const processed = processUserMessageToolResults(msg, settings, "soft");
if (processed.changed) {
result[i] = processed.message;
changed = true;
softTrimmed++;
charsSaved += processed.charsSaved;
totalChars -= processed.charsSaved;
}
}
// Recalculate ratio after soft trim
ratio = totalChars / charWindow;
// Phase 2: Hard Clear (if needed)
if (ratio >= settings.hardClearRatio && settings.hardClear.enabled) {
// Check if we have enough prunable content to make hard clear worthwhile
let prunableChars = 0;
for (const i of prunableIndexes) {
prunableChars += estimateMessageChars(result[i]!);
}
if (prunableChars >= settings.minPrunableToolChars) {
for (const i of prunableIndexes) {
if (ratio < settings.hardClearRatio) break;
const msg = result[i]!;
const beforeChars = estimateMessageChars(msg);
const processed = processUserMessageToolResults(msg, settings, "hard");
if (processed.changed) {
result[i] = processed.message;
changed = true;
hardCleared++;
charsSaved += processed.charsSaved;
totalChars -= processed.charsSaved;
ratio = totalChars / charWindow;
}
}
}
}
return {
messages: result,
changed,
softTrimmed,
hardCleared,
charsSaved,
};
}

View file

@ -23,7 +23,7 @@ export type CompactionEndEvent = {
kept: number;
tokensRemoved?: number;
tokensKept?: number;
reason: "count" | "tokens" | "summary";
reason: "count" | "tokens" | "summary" | "pruning";
};
/** Union of all Multica-specific events */

View file

@ -292,10 +292,10 @@ export class ProfileManager {
updateStyle(style: string): void {
const profile = this.getOrCreateProfile(false);
const currentConfig = profile.config ?? {};
const newConfig: ProfileConfig = {
...currentConfig,
// Use Object.assign to avoid exactOptionalPropertyTypes issues with spread
const newConfig: ProfileConfig = Object.assign({}, currentConfig, {
style: style as ProfileConfig["style"],
};
});
profile.config = newConfig;
this.profile = profile;
writeProfileConfig(this.profileId, newConfig, { baseDir: this.baseDir });

View file

@ -19,7 +19,8 @@ export type CompactionResult = {
tokensKept?: number | undefined;
/** Summary generated in summary mode */
summary?: string | undefined;
reason: "count" | "tokens" | "summary";
/** Reason for compaction: count, tokens, summary, or pruning (tool result trimming only) */
reason: "count" | "tokens" | "summary" | "pruning";
};
/**

View file

@ -2,11 +2,15 @@ import type { AgentMessage } from "@mariozechner/pi-agent-core";
import { getModel, type Model } from "@mariozechner/pi-ai";
import type { SessionEntry, SessionMeta } from "./types.js";
import { appendEntry, readEntries, resolveSessionPath, writeEntries } from "./storage.js";
import { compactMessages, compactMessagesAsync } from "./compaction.js";
import { compactMessages, compactMessagesAsync, type CompactionResult } from "./compaction.js";
import { estimateTokenUsage, shouldCompact as shouldCompactTokens } from "../context-window/index.js";
import { credentialManager } from "../credentials.js";
import { repairSessionFileIfNeeded, type RepairReport } from "./session-file-repair.js";
import { sanitizeToolCallInputs, sanitizeToolUseResultPairing } from "./session-transcript-repair.js";
import {
pruneToolResults,
type ToolResultPruningSettings,
} from "../context-window/tool-result-pruning.js";
/** Get Kimi model for summarization (use a cheaper model than k2-thinking) */
function getSummaryModel(): Model<any> {
@ -54,6 +58,12 @@ export type SessionManagerOptions = {
apiKey?: string | undefined;
/** Custom summary instructions */
customInstructions?: string | undefined;
// Tool result pruning
/** Whether to enable tool result pruning before compaction (default: true in tokens/summary mode) */
enableToolResultPruning?: boolean | undefined;
/** Tool result pruning settings */
toolResultPruning?: Partial<ToolResultPruningSettings> | undefined;
};
export class SessionManager {
@ -74,6 +84,9 @@ export class SessionManager {
private apiKey: string | undefined;
private readonly customInstructions: string | undefined;
private previousSummary: string | undefined;
// Tool result pruning
private readonly enableToolResultPruning: boolean;
private readonly toolResultPruning: Partial<ToolResultPruningSettings> | undefined;
private queue: Promise<void> = Promise.resolve();
private meta: SessionMeta | undefined;
@ -101,6 +114,12 @@ export class SessionManager {
this.apiKey = options.apiKey;
this.customInstructions = options.customInstructions;
// Tool result pruning (enabled by default in tokens/summary mode)
this.enableToolResultPruning =
options.enableToolResultPruning ??
(this.compactionMode === "tokens" || this.compactionMode === "summary");
this.toolResultPruning = options.toolResultPruning;
this.meta = this.loadMeta();
}
@ -209,7 +228,33 @@ export class SessionManager {
return shouldCompactTokens(estimation);
}
async maybeCompact(messages: AgentMessage[]) {
async maybeCompact(messages: AgentMessage[]): Promise<CompactionResult | null> {
let workingMessages = messages;
let toolResultPruningApplied = false;
// Phase 1: Tool result pruning (soft trim / hard clear)
// This reduces token usage without removing messages
if (this.enableToolResultPruning) {
const pruneResult = pruneToolResults({
messages: workingMessages,
contextWindowTokens: this.contextWindowTokens,
settings: this.toolResultPruning,
});
if (pruneResult.changed) {
workingMessages = pruneResult.messages;
toolResultPruningApplied = true;
// Log pruning stats
if (pruneResult.softTrimmed > 0 || pruneResult.hardCleared > 0) {
console.error(
`[SessionManager] Tool result pruning: ${pruneResult.softTrimmed} soft-trimmed, ` +
`${pruneResult.hardCleared} hard-cleared, ~${Math.round(pruneResult.charsSaved / 1000)}k chars saved`,
);
}
}
}
// Phase 2: Message compaction (remove old messages if still needed)
let result;
if (this.compactionMode === "summary") {
@ -219,7 +264,7 @@ export class SessionManager {
if (!apiKey) {
// No API key available, downgrade to tokens mode
result = compactMessages(messages, {
result = compactMessages(workingMessages, {
mode: "tokens",
contextWindowTokens: this.contextWindowTokens,
systemPrompt: this.systemPrompt,
@ -228,7 +273,7 @@ export class SessionManager {
minKeepMessages: this.minKeepMessages,
});
} else {
result = await compactMessagesAsync(messages, {
result = await compactMessagesAsync(workingMessages, {
mode: "summary",
model,
apiKey,
@ -247,7 +292,7 @@ export class SessionManager {
}
}
} else {
result = compactMessages(messages, {
result = compactMessages(workingMessages, {
mode: this.compactionMode,
// Count mode parameters
maxMessages: this.maxMessages,
@ -261,7 +306,14 @@ export class SessionManager {
});
}
if (!result) return null;
// If no message compaction needed but tool result pruning was applied,
// still return the pruned messages
if (!result) {
if (toolResultPruningApplied) {
return { kept: workingMessages, removedCount: 0, reason: "pruning" as const };
}
return null;
}
const entries: SessionEntry[] = [];
if (this.meta) {

View file

@ -23,5 +23,5 @@ export type SessionEntry =
tokensKept?: number | undefined;
/** 摘要模式生成的摘要 */
summary?: string | undefined;
reason?: "count" | "tokens" | "summary" | undefined;
reason?: "count" | "tokens" | "summary" | "pruning" | undefined;
};

View file

@ -1,5 +1,6 @@
{
"$schema": "https://turbo.build/schema.json",
"globalDependencies": ["src/**"],
"tasks": {
"build": {
"dependsOn": ["^build"],