Merge pull request #95 from multica-ai/Bohan-J/tool-result-pruning
feat(context): add tool result pruning for smarter context management
This commit is contained in:
commit
363c65b392
9 changed files with 871 additions and 12 deletions
|
|
@ -44,3 +44,13 @@ export {
|
|||
compactMessagesWithSummary,
|
||||
compactMessagesWithChunkedSummary,
|
||||
} from "./summarization.js";
|
||||
|
||||
// Tool result pruning
|
||||
export type {
|
||||
ToolResultPruningSettings,
|
||||
ToolResultPruningResult,
|
||||
} from "./tool-result-pruning.js";
|
||||
export {
|
||||
DEFAULT_TOOL_RESULT_PRUNING_SETTINGS,
|
||||
pruneToolResults,
|
||||
} from "./tool-result-pruning.js";
|
||||
|
|
|
|||
285
src/agent/context-window/tool-result-pruning.test.ts
Normal file
285
src/agent/context-window/tool-result-pruning.test.ts
Normal file
|
|
@ -0,0 +1,285 @@
|
|||
import { describe, it, expect } from "vitest";
|
||||
import type { AgentMessage } from "@mariozechner/pi-agent-core";
|
||||
import { pruneToolResults, DEFAULT_TOOL_RESULT_PRUNING_SETTINGS } from "./tool-result-pruning.js";
|
||||
|
||||
// Helper to create a user message with tool result
|
||||
function createToolResultMessage(
|
||||
toolName: string,
|
||||
content: string,
|
||||
toolUseId: string = "tool-123",
|
||||
): AgentMessage {
|
||||
return {
|
||||
role: "user",
|
||||
content: [
|
||||
{
|
||||
type: "tool_result",
|
||||
tool_use_id: toolUseId,
|
||||
name: toolName,
|
||||
content: [{ type: "text", text: content }],
|
||||
},
|
||||
],
|
||||
} as unknown as AgentMessage;
|
||||
}
|
||||
|
||||
// Helper to create an assistant message
|
||||
function createAssistantMessage(text: string): AgentMessage {
|
||||
return {
|
||||
role: "assistant",
|
||||
content: [{ type: "text", text }],
|
||||
} as unknown as AgentMessage;
|
||||
}
|
||||
|
||||
// Helper to create a user message
|
||||
function createUserMessage(text: string): AgentMessage {
|
||||
return {
|
||||
role: "user",
|
||||
content: text,
|
||||
} as unknown as AgentMessage;
|
||||
}
|
||||
|
||||
describe("pruneToolResults", () => {
|
||||
it("returns unchanged if utilization is below softTrimRatio", () => {
|
||||
const messages = [
|
||||
createUserMessage("Hello"),
|
||||
createAssistantMessage("Hi there!"),
|
||||
createToolResultMessage("read", "Short content"),
|
||||
];
|
||||
|
||||
const result = pruneToolResults({
|
||||
messages,
|
||||
contextWindowTokens: 200_000, // Very large window
|
||||
});
|
||||
|
||||
expect(result.changed).toBe(false);
|
||||
expect(result.messages).toBe(messages);
|
||||
expect(result.softTrimmed).toBe(0);
|
||||
expect(result.hardCleared).toBe(0);
|
||||
});
|
||||
|
||||
it("soft trims large tool results", () => {
|
||||
// Create a message with a large tool result (5000 chars)
|
||||
const largeContent = "A".repeat(5000);
|
||||
const messages = [
|
||||
createUserMessage("Hello"),
|
||||
createAssistantMessage("Processing..."),
|
||||
createToolResultMessage("read", largeContent),
|
||||
createAssistantMessage("Done!"),
|
||||
createAssistantMessage("Follow up"),
|
||||
createAssistantMessage("Another one"),
|
||||
createAssistantMessage("Protected message"), // This is protected (keepLastAssistants=3)
|
||||
];
|
||||
|
||||
const result = pruneToolResults({
|
||||
messages,
|
||||
contextWindowTokens: 10_000, // Small window to trigger pruning
|
||||
settings: {
|
||||
softTrimRatio: 0.1, // Low threshold to ensure pruning
|
||||
},
|
||||
});
|
||||
|
||||
expect(result.changed).toBe(true);
|
||||
expect(result.softTrimmed).toBe(1);
|
||||
|
||||
// Check that the trimmed message contains head + tail
|
||||
const trimmedMsg = result.messages[2] as any;
|
||||
const trimmedText = trimmedMsg.content[0].content[0].text;
|
||||
expect(trimmedText).toContain("A".repeat(100)); // Should have some head content
|
||||
expect(trimmedText).toContain("..."); // Truncation marker
|
||||
expect(trimmedText).toContain("[Tool result trimmed:");
|
||||
});
|
||||
|
||||
it("hard clears when utilization exceeds hardClearRatio", () => {
|
||||
// Create multiple messages with large tool results
|
||||
const largeContent = "X".repeat(10000);
|
||||
const messages = [
|
||||
createUserMessage("Start"),
|
||||
createAssistantMessage("Processing 1"),
|
||||
createToolResultMessage("read", largeContent, "tool-1"),
|
||||
createAssistantMessage("Processing 2"),
|
||||
createToolResultMessage("exec", largeContent, "tool-2"),
|
||||
createAssistantMessage("Processing 3"),
|
||||
createToolResultMessage("glob", largeContent, "tool-3"),
|
||||
createAssistantMessage("Done 1"), // Protected
|
||||
createAssistantMessage("Done 2"), // Protected
|
||||
createAssistantMessage("Done 3"), // Protected
|
||||
];
|
||||
|
||||
const result = pruneToolResults({
|
||||
messages,
|
||||
contextWindowTokens: 5_000, // Very small window
|
||||
settings: {
|
||||
softTrimRatio: 0.1,
|
||||
hardClearRatio: 0.2,
|
||||
minPrunableToolChars: 1000, // Lower threshold for test
|
||||
hardClear: {
|
||||
enabled: true,
|
||||
placeholder: "[Cleared]",
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
expect(result.changed).toBe(true);
|
||||
// Should have cleared at least some tool results
|
||||
expect(result.hardCleared).toBeGreaterThan(0);
|
||||
expect(result.charsSaved).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it("protects last N assistant messages", () => {
|
||||
const messages = [
|
||||
createUserMessage("Hello"),
|
||||
createAssistantMessage("First"),
|
||||
createToolResultMessage("read", "A".repeat(5000), "tool-1"), // Should be prunable
|
||||
createAssistantMessage("Second"), // Protected (keepLastAssistants=3)
|
||||
createToolResultMessage("read", "B".repeat(5000), "tool-2"), // In protected zone, should NOT be pruned
|
||||
createAssistantMessage("Third"), // Protected
|
||||
createAssistantMessage("Fourth"), // Protected
|
||||
];
|
||||
|
||||
const result = pruneToolResults({
|
||||
messages,
|
||||
contextWindowTokens: 5_000,
|
||||
settings: {
|
||||
softTrimRatio: 0.1,
|
||||
keepLastAssistants: 3,
|
||||
},
|
||||
});
|
||||
|
||||
// The first tool result (before protected zone) may be pruned
|
||||
// But the second one (after "Second" assistant which is in protected zone) should not be
|
||||
if (result.changed) {
|
||||
// Check that tool-2 result is NOT modified (it's in protected zone)
|
||||
const tool2Msg = result.messages[4] as any;
|
||||
const tool2Content = tool2Msg.content[0].content[0].text;
|
||||
expect(tool2Content).toBe("B".repeat(5000)); // Unchanged
|
||||
}
|
||||
});
|
||||
|
||||
it("never prunes before first user message", () => {
|
||||
const messages = [
|
||||
createAssistantMessage("Bootstrap read"), // Before first user message
|
||||
createToolResultMessage("read", "A".repeat(5000), "tool-1"), // Should NOT be pruned
|
||||
createUserMessage("Hello"), // First user message
|
||||
createAssistantMessage("Response"),
|
||||
createToolResultMessage("read", "B".repeat(5000), "tool-2"), // Can be pruned
|
||||
createAssistantMessage("Done 1"),
|
||||
createAssistantMessage("Done 2"),
|
||||
createAssistantMessage("Done 3"),
|
||||
];
|
||||
|
||||
const result = pruneToolResults({
|
||||
messages,
|
||||
contextWindowTokens: 5_000,
|
||||
settings: {
|
||||
softTrimRatio: 0.1,
|
||||
},
|
||||
});
|
||||
|
||||
// The first tool result (before first user message) should NOT be modified
|
||||
const tool1Msg = result.messages[1] as any;
|
||||
const tool1Content = tool1Msg.content[0].content[0].text;
|
||||
expect(tool1Content).toBe("A".repeat(5000)); // Unchanged - bootstrap protection
|
||||
});
|
||||
|
||||
it("respects tool deny list", () => {
|
||||
const messages = [
|
||||
createUserMessage("Hello"),
|
||||
createAssistantMessage("Processing"),
|
||||
createToolResultMessage("read", "A".repeat(5000), "tool-1"),
|
||||
createAssistantMessage("Done 1"),
|
||||
createAssistantMessage("Done 2"),
|
||||
createAssistantMessage("Done 3"),
|
||||
];
|
||||
|
||||
const result = pruneToolResults({
|
||||
messages,
|
||||
contextWindowTokens: 5_000,
|
||||
settings: {
|
||||
softTrimRatio: 0.1,
|
||||
tools: {
|
||||
deny: ["read"], // Don't prune read tool results
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
// read tool should not be pruned
|
||||
expect(result.changed).toBe(false);
|
||||
});
|
||||
|
||||
it("respects tool allow list", () => {
|
||||
const messages = [
|
||||
createUserMessage("Hello"),
|
||||
createAssistantMessage("Processing"),
|
||||
createToolResultMessage("read", "A".repeat(5000), "tool-1"),
|
||||
createToolResultMessage("exec", "B".repeat(5000), "tool-2"),
|
||||
createAssistantMessage("Done 1"),
|
||||
createAssistantMessage("Done 2"),
|
||||
createAssistantMessage("Done 3"),
|
||||
];
|
||||
|
||||
const result = pruneToolResults({
|
||||
messages,
|
||||
contextWindowTokens: 5_000,
|
||||
settings: {
|
||||
softTrimRatio: 0.1,
|
||||
tools: {
|
||||
allow: ["exec"], // Only prune exec tool results
|
||||
},
|
||||
},
|
||||
});
|
||||
|
||||
if (result.changed) {
|
||||
// read tool should not be pruned
|
||||
const tool1Msg = result.messages[2] as any;
|
||||
const tool1Content = tool1Msg.content[0].content[0].text;
|
||||
expect(tool1Content).toBe("A".repeat(5000)); // Unchanged
|
||||
}
|
||||
});
|
||||
|
||||
it("skips tool results with images", () => {
|
||||
const messages = [
|
||||
createUserMessage("Hello"),
|
||||
createAssistantMessage("Processing"),
|
||||
{
|
||||
role: "user",
|
||||
content: [
|
||||
{
|
||||
type: "tool_result",
|
||||
tool_use_id: "tool-1",
|
||||
name: "screenshot",
|
||||
content: [
|
||||
{ type: "image", source: { type: "base64", data: "abc123" } },
|
||||
{ type: "text", text: "A".repeat(5000) },
|
||||
],
|
||||
},
|
||||
],
|
||||
} as unknown as AgentMessage,
|
||||
createAssistantMessage("Done 1"),
|
||||
createAssistantMessage("Done 2"),
|
||||
createAssistantMessage("Done 3"),
|
||||
];
|
||||
|
||||
const result = pruneToolResults({
|
||||
messages,
|
||||
contextWindowTokens: 5_000,
|
||||
settings: {
|
||||
softTrimRatio: 0.1,
|
||||
},
|
||||
});
|
||||
|
||||
// Image-containing tool result should not be pruned
|
||||
expect(result.softTrimmed).toBe(0);
|
||||
expect(result.hardCleared).toBe(0);
|
||||
});
|
||||
});
|
||||
|
||||
describe("DEFAULT_TOOL_RESULT_PRUNING_SETTINGS", () => {
|
||||
it("has expected default values", () => {
|
||||
expect(DEFAULT_TOOL_RESULT_PRUNING_SETTINGS.softTrimRatio).toBe(0.3);
|
||||
expect(DEFAULT_TOOL_RESULT_PRUNING_SETTINGS.hardClearRatio).toBe(0.5);
|
||||
expect(DEFAULT_TOOL_RESULT_PRUNING_SETTINGS.keepLastAssistants).toBe(3);
|
||||
expect(DEFAULT_TOOL_RESULT_PRUNING_SETTINGS.softTrim.maxChars).toBe(4000);
|
||||
expect(DEFAULT_TOOL_RESULT_PRUNING_SETTINGS.softTrim.headChars).toBe(1500);
|
||||
expect(DEFAULT_TOOL_RESULT_PRUNING_SETTINGS.softTrim.tailChars).toBe(1500);
|
||||
expect(DEFAULT_TOOL_RESULT_PRUNING_SETTINGS.hardClear.enabled).toBe(true);
|
||||
});
|
||||
});
|
||||
510
src/agent/context-window/tool-result-pruning.ts
Normal file
510
src/agent/context-window/tool-result-pruning.ts
Normal file
|
|
@ -0,0 +1,510 @@
|
|||
/**
|
||||
* Tool Result Pruning
|
||||
*
|
||||
* Smart pruning of tool results to reduce context window usage while preserving
|
||||
* useful information. Implements two-phase pruning:
|
||||
*
|
||||
* 1. Soft Trim: Keep head + tail of large tool results
|
||||
* 2. Hard Clear: Replace old tool results with placeholder
|
||||
*
|
||||
* Based on OpenClaw's microcompact-style context pruning.
|
||||
*/
|
||||
|
||||
import type { AgentMessage } from "@mariozechner/pi-agent-core";
|
||||
|
||||
// ─── Types ───────────────────────────────────────────────────────────────────
|
||||
|
||||
export type ToolResultPruningSettings = {
|
||||
/** Utilization ratio to start soft trimming (default: 0.3) */
|
||||
softTrimRatio: number;
|
||||
/** Utilization ratio to start hard clearing (default: 0.5) */
|
||||
hardClearRatio: number;
|
||||
/** Minimum prunable tool result chars to consider hard clear (default: 50000) */
|
||||
minPrunableToolChars: number;
|
||||
/** Number of recent assistant messages to protect from pruning (default: 3) */
|
||||
keepLastAssistants: number;
|
||||
/** Soft trim settings */
|
||||
softTrim: {
|
||||
/** Max chars before triggering soft trim (default: 4000) */
|
||||
maxChars: number;
|
||||
/** Chars to keep from start (default: 1500) */
|
||||
headChars: number;
|
||||
/** Chars to keep from end (default: 1500) */
|
||||
tailChars: number;
|
||||
};
|
||||
/** Hard clear settings */
|
||||
hardClear: {
|
||||
/** Whether hard clear is enabled (default: true) */
|
||||
enabled: boolean;
|
||||
/** Placeholder text for cleared results */
|
||||
placeholder: string;
|
||||
};
|
||||
/** Tool names to allow/deny pruning */
|
||||
tools?: {
|
||||
allow?: string[];
|
||||
deny?: string[];
|
||||
};
|
||||
};
|
||||
|
||||
export const DEFAULT_TOOL_RESULT_PRUNING_SETTINGS: ToolResultPruningSettings = {
|
||||
softTrimRatio: 0.3,
|
||||
hardClearRatio: 0.5,
|
||||
minPrunableToolChars: 50_000,
|
||||
keepLastAssistants: 3,
|
||||
softTrim: {
|
||||
maxChars: 4_000,
|
||||
headChars: 1_500,
|
||||
tailChars: 1_500,
|
||||
},
|
||||
hardClear: {
|
||||
enabled: true,
|
||||
placeholder: "[Tool result cleared to save context space]",
|
||||
},
|
||||
};
|
||||
|
||||
export type ToolResultPruningResult = {
|
||||
/** Pruned messages */
|
||||
messages: AgentMessage[];
|
||||
/** Whether any changes were made */
|
||||
changed: boolean;
|
||||
/** Number of soft-trimmed results */
|
||||
softTrimmed: number;
|
||||
/** Number of hard-cleared results */
|
||||
hardCleared: number;
|
||||
/** Estimated chars saved */
|
||||
charsSaved: number;
|
||||
};
|
||||
|
||||
// ─── Constants ───────────────────────────────────────────────────────────────
|
||||
|
||||
const CHARS_PER_TOKEN_ESTIMATE = 4;
|
||||
const IMAGE_CHAR_ESTIMATE = 8_000;
|
||||
|
||||
// ─── Helper Functions ────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Extract text content from a tool result content block.
|
||||
*/
|
||||
function extractToolResultText(content: unknown): string {
|
||||
if (typeof content === "string") return content;
|
||||
if (Array.isArray(content)) {
|
||||
const parts: string[] = [];
|
||||
for (const block of content) {
|
||||
if (typeof block === "string") {
|
||||
parts.push(block);
|
||||
} else if (block && typeof block === "object") {
|
||||
if ("text" in block && typeof block.text === "string") {
|
||||
parts.push(block.text);
|
||||
}
|
||||
}
|
||||
}
|
||||
return parts.join("\n");
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if content contains images.
|
||||
*/
|
||||
function hasImageContent(content: unknown): boolean {
|
||||
if (!Array.isArray(content)) return false;
|
||||
for (const block of content) {
|
||||
if (block && typeof block === "object" && "type" in block) {
|
||||
if (block.type === "image") return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Estimate character count for a message.
|
||||
*/
|
||||
function estimateMessageChars(message: AgentMessage): number {
|
||||
const msgAny = message as any;
|
||||
|
||||
if (message.role === "user") {
|
||||
const content = msgAny.content;
|
||||
if (typeof content === "string") return content.length;
|
||||
if (!Array.isArray(content)) return 0;
|
||||
|
||||
let chars = 0;
|
||||
for (const block of content) {
|
||||
if (typeof block === "string") {
|
||||
chars += block.length;
|
||||
} else if (block && typeof block === "object") {
|
||||
if (block.type === "text" && typeof block.text === "string") {
|
||||
chars += block.text.length;
|
||||
} else if (block.type === "tool_result") {
|
||||
chars += extractToolResultText(block.content).length;
|
||||
} else if (block.type === "image") {
|
||||
chars += IMAGE_CHAR_ESTIMATE;
|
||||
}
|
||||
}
|
||||
}
|
||||
return chars;
|
||||
}
|
||||
|
||||
if (message.role === "assistant") {
|
||||
const content = msgAny.content;
|
||||
if (typeof content === "string") return content.length;
|
||||
if (!Array.isArray(content)) return 0;
|
||||
|
||||
let chars = 0;
|
||||
for (const block of content) {
|
||||
if (typeof block === "string") {
|
||||
chars += block.length;
|
||||
} else if (block && typeof block === "object") {
|
||||
if (block.type === "text" && typeof block.text === "string") {
|
||||
chars += block.text.length;
|
||||
} else if (block.type === "thinking" && typeof block.thinking === "string") {
|
||||
chars += block.thinking.length;
|
||||
} else if (block.type === "toolCall" || block.type === "tool_use") {
|
||||
try {
|
||||
chars += JSON.stringify(block.arguments ?? block.input ?? {}).length;
|
||||
} catch {
|
||||
chars += 128;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return chars;
|
||||
}
|
||||
|
||||
return 256;
|
||||
}
|
||||
|
||||
/**
|
||||
* Estimate total character count for messages.
|
||||
*/
|
||||
function estimateContextChars(messages: AgentMessage[]): number {
|
||||
return messages.reduce((sum, m) => sum + estimateMessageChars(m), 0);
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the index where we should stop protecting assistant messages.
|
||||
* Returns null if not enough assistant messages exist.
|
||||
*/
|
||||
function findAssistantCutoffIndex(
|
||||
messages: AgentMessage[],
|
||||
keepLastAssistants: number,
|
||||
): number | null {
|
||||
if (keepLastAssistants <= 0) return messages.length;
|
||||
|
||||
let remaining = keepLastAssistants;
|
||||
for (let i = messages.length - 1; i >= 0; i--) {
|
||||
if (messages[i]?.role !== "assistant") continue;
|
||||
remaining--;
|
||||
if (remaining === 0) return i;
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a user message is a "real" user message (not just tool results).
|
||||
* Tool results are sent as user messages but they're not real user input.
|
||||
*/
|
||||
function isRealUserMessage(message: AgentMessage): boolean {
|
||||
if (message.role !== "user") return false;
|
||||
|
||||
const msgAny = message as any;
|
||||
const content = msgAny.content;
|
||||
|
||||
// String content is a real user message
|
||||
if (typeof content === "string") return true;
|
||||
|
||||
// Array content - check if it has any non-tool-result blocks
|
||||
if (Array.isArray(content)) {
|
||||
for (const block of content) {
|
||||
if (typeof block === "string") return true;
|
||||
if (block && typeof block === "object") {
|
||||
// Any type other than tool_result is real user content
|
||||
if (block.type !== "tool_result") return true;
|
||||
}
|
||||
}
|
||||
// Only tool_result blocks - not a real user message
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find the index of the first real user message (not tool results).
|
||||
* This is used for bootstrap protection - we never prune before the first real user input.
|
||||
*/
|
||||
function findFirstUserIndex(messages: AgentMessage[]): number | null {
|
||||
for (let i = 0; i < messages.length; i++) {
|
||||
const msg = messages[i];
|
||||
if (msg && isRealUserMessage(msg)) return i;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a tool should be pruned based on settings.
|
||||
*/
|
||||
function isToolPrunable(toolName: string, settings: ToolResultPruningSettings): boolean {
|
||||
const { tools } = settings;
|
||||
if (!tools) return true;
|
||||
|
||||
// If deny list exists and tool is in it, don't prune
|
||||
if (tools.deny?.includes(toolName)) return false;
|
||||
|
||||
// If allow list exists, only prune if tool is in it
|
||||
if (tools.allow && tools.allow.length > 0) {
|
||||
return tools.allow.includes(toolName);
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Take first N characters from text.
|
||||
*/
|
||||
function takeHead(text: string, maxChars: number): string {
|
||||
if (maxChars <= 0) return "";
|
||||
if (text.length <= maxChars) return text;
|
||||
return text.slice(0, maxChars);
|
||||
}
|
||||
|
||||
/**
|
||||
* Take last N characters from text.
|
||||
*/
|
||||
function takeTail(text: string, maxChars: number): string {
|
||||
if (maxChars <= 0) return "";
|
||||
if (text.length <= maxChars) return text;
|
||||
return text.slice(text.length - maxChars);
|
||||
}
|
||||
|
||||
/**
|
||||
* Soft trim a tool result text.
|
||||
*/
|
||||
function softTrimText(
|
||||
text: string,
|
||||
settings: ToolResultPruningSettings,
|
||||
): { trimmed: string; saved: number } | null {
|
||||
const { maxChars, headChars, tailChars } = settings.softTrim;
|
||||
|
||||
if (text.length <= maxChars) return null;
|
||||
if (headChars + tailChars >= text.length) return null;
|
||||
|
||||
const head = takeHead(text, headChars);
|
||||
const tail = takeTail(text, tailChars);
|
||||
const note = `\n\n[Tool result trimmed: kept first ${headChars} chars and last ${tailChars} chars of ${text.length} chars.]`;
|
||||
const trimmed = `${head}\n...\n${tail}${note}`;
|
||||
|
||||
return {
|
||||
trimmed,
|
||||
saved: text.length - trimmed.length,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Process a user message containing tool results.
|
||||
* Returns modified message if any tool results were trimmed/cleared.
|
||||
*/
|
||||
function processUserMessageToolResults(
|
||||
message: AgentMessage,
|
||||
settings: ToolResultPruningSettings,
|
||||
mode: "soft" | "hard",
|
||||
): { message: AgentMessage; changed: boolean; charsSaved: number } {
|
||||
const msgAny = message as any;
|
||||
const content = msgAny.content;
|
||||
|
||||
if (!Array.isArray(content)) {
|
||||
return { message, changed: false, charsSaved: 0 };
|
||||
}
|
||||
|
||||
let changed = false;
|
||||
let charsSaved = 0;
|
||||
const newContent: any[] = [];
|
||||
|
||||
for (const block of content) {
|
||||
if (!block || typeof block !== "object" || block.type !== "tool_result") {
|
||||
newContent.push(block);
|
||||
continue;
|
||||
}
|
||||
|
||||
const toolName = block.name ?? "unknown";
|
||||
|
||||
// Skip non-prunable tools
|
||||
if (!isToolPrunable(toolName, settings)) {
|
||||
newContent.push(block);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Skip image-containing tool results
|
||||
if (hasImageContent(block.content)) {
|
||||
newContent.push(block);
|
||||
continue;
|
||||
}
|
||||
|
||||
const originalText = extractToolResultText(block.content);
|
||||
|
||||
if (mode === "soft") {
|
||||
const result = softTrimText(originalText, settings);
|
||||
if (result) {
|
||||
newContent.push({
|
||||
...block,
|
||||
content: [{ type: "text", text: result.trimmed }],
|
||||
});
|
||||
changed = true;
|
||||
charsSaved += result.saved;
|
||||
} else {
|
||||
newContent.push(block);
|
||||
}
|
||||
} else {
|
||||
// Hard clear
|
||||
newContent.push({
|
||||
...block,
|
||||
content: [{ type: "text", text: settings.hardClear.placeholder }],
|
||||
});
|
||||
changed = true;
|
||||
charsSaved += originalText.length - settings.hardClear.placeholder.length;
|
||||
}
|
||||
}
|
||||
|
||||
if (!changed) {
|
||||
return { message, changed: false, charsSaved: 0 };
|
||||
}
|
||||
|
||||
return {
|
||||
message: { ...message, content: newContent } as AgentMessage,
|
||||
changed: true,
|
||||
charsSaved,
|
||||
};
|
||||
}
|
||||
|
||||
// ─── Main Functions ──────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Prune tool results in messages to reduce context window usage.
|
||||
*
|
||||
* Two-phase approach:
|
||||
* 1. Soft Trim (at softTrimRatio): Keep head + tail of large tool results
|
||||
* 2. Hard Clear (at hardClearRatio): Replace old tool results with placeholder
|
||||
*
|
||||
* Protections:
|
||||
* - Never prunes before first user message (protects bootstrap/identity reads)
|
||||
* - Protects last N assistant messages and their corresponding tool results
|
||||
* - Skips image-containing tool results
|
||||
* - Respects tool allow/deny lists
|
||||
*/
|
||||
export function pruneToolResults(params: {
|
||||
messages: AgentMessage[];
|
||||
contextWindowTokens: number;
|
||||
settings?: Partial<ToolResultPruningSettings>;
|
||||
}): ToolResultPruningResult {
|
||||
const { messages, contextWindowTokens } = params;
|
||||
const settings: ToolResultPruningSettings = {
|
||||
...DEFAULT_TOOL_RESULT_PRUNING_SETTINGS,
|
||||
...params.settings,
|
||||
softTrim: {
|
||||
...DEFAULT_TOOL_RESULT_PRUNING_SETTINGS.softTrim,
|
||||
...params.settings?.softTrim,
|
||||
},
|
||||
hardClear: {
|
||||
...DEFAULT_TOOL_RESULT_PRUNING_SETTINGS.hardClear,
|
||||
...params.settings?.hardClear,
|
||||
},
|
||||
};
|
||||
|
||||
const charWindow = contextWindowTokens * CHARS_PER_TOKEN_ESTIMATE;
|
||||
if (charWindow <= 0) {
|
||||
return { messages, changed: false, softTrimmed: 0, hardCleared: 0, charsSaved: 0 };
|
||||
}
|
||||
|
||||
// Find cutoff index for protected assistant messages
|
||||
const cutoffIndex = findAssistantCutoffIndex(messages, settings.keepLastAssistants);
|
||||
if (cutoffIndex === null) {
|
||||
return { messages, changed: false, softTrimmed: 0, hardCleared: 0, charsSaved: 0 };
|
||||
}
|
||||
|
||||
// Never prune before first user message (bootstrap protection)
|
||||
const firstUserIndex = findFirstUserIndex(messages);
|
||||
const pruneStartIndex = firstUserIndex === null ? messages.length : firstUserIndex;
|
||||
|
||||
// Calculate current utilization
|
||||
let totalChars = estimateContextChars(messages);
|
||||
let ratio = totalChars / charWindow;
|
||||
|
||||
// No pruning needed
|
||||
if (ratio < settings.softTrimRatio) {
|
||||
return { messages, changed: false, softTrimmed: 0, hardCleared: 0, charsSaved: 0 };
|
||||
}
|
||||
|
||||
let result = messages.slice();
|
||||
let changed = false;
|
||||
let softTrimmed = 0;
|
||||
let hardCleared = 0;
|
||||
let charsSaved = 0;
|
||||
|
||||
// Track which messages have prunable tool results
|
||||
const prunableIndexes: number[] = [];
|
||||
|
||||
// Phase 1: Soft Trim
|
||||
for (let i = pruneStartIndex; i < cutoffIndex; i++) {
|
||||
const msg = result[i];
|
||||
if (!msg || msg.role !== "user") continue;
|
||||
|
||||
const msgAny = msg as any;
|
||||
if (!Array.isArray(msgAny.content)) continue;
|
||||
|
||||
// Check if this message has tool results
|
||||
const hasToolResult = msgAny.content.some(
|
||||
(b: any) => b && typeof b === "object" && b.type === "tool_result",
|
||||
);
|
||||
if (!hasToolResult) continue;
|
||||
|
||||
prunableIndexes.push(i);
|
||||
|
||||
const processed = processUserMessageToolResults(msg, settings, "soft");
|
||||
if (processed.changed) {
|
||||
result[i] = processed.message;
|
||||
changed = true;
|
||||
softTrimmed++;
|
||||
charsSaved += processed.charsSaved;
|
||||
totalChars -= processed.charsSaved;
|
||||
}
|
||||
}
|
||||
|
||||
// Recalculate ratio after soft trim
|
||||
ratio = totalChars / charWindow;
|
||||
|
||||
// Phase 2: Hard Clear (if needed)
|
||||
if (ratio >= settings.hardClearRatio && settings.hardClear.enabled) {
|
||||
// Check if we have enough prunable content to make hard clear worthwhile
|
||||
let prunableChars = 0;
|
||||
for (const i of prunableIndexes) {
|
||||
prunableChars += estimateMessageChars(result[i]!);
|
||||
}
|
||||
|
||||
if (prunableChars >= settings.minPrunableToolChars) {
|
||||
for (const i of prunableIndexes) {
|
||||
if (ratio < settings.hardClearRatio) break;
|
||||
|
||||
const msg = result[i]!;
|
||||
const beforeChars = estimateMessageChars(msg);
|
||||
|
||||
const processed = processUserMessageToolResults(msg, settings, "hard");
|
||||
if (processed.changed) {
|
||||
result[i] = processed.message;
|
||||
changed = true;
|
||||
hardCleared++;
|
||||
charsSaved += processed.charsSaved;
|
||||
totalChars -= processed.charsSaved;
|
||||
ratio = totalChars / charWindow;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
messages: result,
|
||||
changed,
|
||||
softTrimmed,
|
||||
hardCleared,
|
||||
charsSaved,
|
||||
};
|
||||
}
|
||||
|
|
@ -23,7 +23,7 @@ export type CompactionEndEvent = {
|
|||
kept: number;
|
||||
tokensRemoved?: number;
|
||||
tokensKept?: number;
|
||||
reason: "count" | "tokens" | "summary";
|
||||
reason: "count" | "tokens" | "summary" | "pruning";
|
||||
};
|
||||
|
||||
/** Union of all Multica-specific events */
|
||||
|
|
|
|||
|
|
@ -292,10 +292,10 @@ export class ProfileManager {
|
|||
updateStyle(style: string): void {
|
||||
const profile = this.getOrCreateProfile(false);
|
||||
const currentConfig = profile.config ?? {};
|
||||
const newConfig: ProfileConfig = {
|
||||
...currentConfig,
|
||||
// Use Object.assign to avoid exactOptionalPropertyTypes issues with spread
|
||||
const newConfig: ProfileConfig = Object.assign({}, currentConfig, {
|
||||
style: style as ProfileConfig["style"],
|
||||
};
|
||||
});
|
||||
profile.config = newConfig;
|
||||
this.profile = profile;
|
||||
writeProfileConfig(this.profileId, newConfig, { baseDir: this.baseDir });
|
||||
|
|
|
|||
|
|
@ -19,7 +19,8 @@ export type CompactionResult = {
|
|||
tokensKept?: number | undefined;
|
||||
/** Summary generated in summary mode */
|
||||
summary?: string | undefined;
|
||||
reason: "count" | "tokens" | "summary";
|
||||
/** Reason for compaction: count, tokens, summary, or pruning (tool result trimming only) */
|
||||
reason: "count" | "tokens" | "summary" | "pruning";
|
||||
};
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -2,11 +2,15 @@ import type { AgentMessage } from "@mariozechner/pi-agent-core";
|
|||
import { getModel, type Model } from "@mariozechner/pi-ai";
|
||||
import type { SessionEntry, SessionMeta } from "./types.js";
|
||||
import { appendEntry, readEntries, resolveSessionPath, writeEntries } from "./storage.js";
|
||||
import { compactMessages, compactMessagesAsync } from "./compaction.js";
|
||||
import { compactMessages, compactMessagesAsync, type CompactionResult } from "./compaction.js";
|
||||
import { estimateTokenUsage, shouldCompact as shouldCompactTokens } from "../context-window/index.js";
|
||||
import { credentialManager } from "../credentials.js";
|
||||
import { repairSessionFileIfNeeded, type RepairReport } from "./session-file-repair.js";
|
||||
import { sanitizeToolCallInputs, sanitizeToolUseResultPairing } from "./session-transcript-repair.js";
|
||||
import {
|
||||
pruneToolResults,
|
||||
type ToolResultPruningSettings,
|
||||
} from "../context-window/tool-result-pruning.js";
|
||||
|
||||
/** Get Kimi model for summarization (use a cheaper model than k2-thinking) */
|
||||
function getSummaryModel(): Model<any> {
|
||||
|
|
@ -54,6 +58,12 @@ export type SessionManagerOptions = {
|
|||
apiKey?: string | undefined;
|
||||
/** Custom summary instructions */
|
||||
customInstructions?: string | undefined;
|
||||
|
||||
// Tool result pruning
|
||||
/** Whether to enable tool result pruning before compaction (default: true in tokens/summary mode) */
|
||||
enableToolResultPruning?: boolean | undefined;
|
||||
/** Tool result pruning settings */
|
||||
toolResultPruning?: Partial<ToolResultPruningSettings> | undefined;
|
||||
};
|
||||
|
||||
export class SessionManager {
|
||||
|
|
@ -74,6 +84,9 @@ export class SessionManager {
|
|||
private apiKey: string | undefined;
|
||||
private readonly customInstructions: string | undefined;
|
||||
private previousSummary: string | undefined;
|
||||
// Tool result pruning
|
||||
private readonly enableToolResultPruning: boolean;
|
||||
private readonly toolResultPruning: Partial<ToolResultPruningSettings> | undefined;
|
||||
|
||||
private queue: Promise<void> = Promise.resolve();
|
||||
private meta: SessionMeta | undefined;
|
||||
|
|
@ -101,6 +114,12 @@ export class SessionManager {
|
|||
this.apiKey = options.apiKey;
|
||||
this.customInstructions = options.customInstructions;
|
||||
|
||||
// Tool result pruning (enabled by default in tokens/summary mode)
|
||||
this.enableToolResultPruning =
|
||||
options.enableToolResultPruning ??
|
||||
(this.compactionMode === "tokens" || this.compactionMode === "summary");
|
||||
this.toolResultPruning = options.toolResultPruning;
|
||||
|
||||
this.meta = this.loadMeta();
|
||||
}
|
||||
|
||||
|
|
@ -209,7 +228,33 @@ export class SessionManager {
|
|||
return shouldCompactTokens(estimation);
|
||||
}
|
||||
|
||||
async maybeCompact(messages: AgentMessage[]) {
|
||||
async maybeCompact(messages: AgentMessage[]): Promise<CompactionResult | null> {
|
||||
let workingMessages = messages;
|
||||
let toolResultPruningApplied = false;
|
||||
|
||||
// Phase 1: Tool result pruning (soft trim / hard clear)
|
||||
// This reduces token usage without removing messages
|
||||
if (this.enableToolResultPruning) {
|
||||
const pruneResult = pruneToolResults({
|
||||
messages: workingMessages,
|
||||
contextWindowTokens: this.contextWindowTokens,
|
||||
settings: this.toolResultPruning,
|
||||
});
|
||||
|
||||
if (pruneResult.changed) {
|
||||
workingMessages = pruneResult.messages;
|
||||
toolResultPruningApplied = true;
|
||||
// Log pruning stats
|
||||
if (pruneResult.softTrimmed > 0 || pruneResult.hardCleared > 0) {
|
||||
console.error(
|
||||
`[SessionManager] Tool result pruning: ${pruneResult.softTrimmed} soft-trimmed, ` +
|
||||
`${pruneResult.hardCleared} hard-cleared, ~${Math.round(pruneResult.charsSaved / 1000)}k chars saved`,
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Phase 2: Message compaction (remove old messages if still needed)
|
||||
let result;
|
||||
|
||||
if (this.compactionMode === "summary") {
|
||||
|
|
@ -219,7 +264,7 @@ export class SessionManager {
|
|||
|
||||
if (!apiKey) {
|
||||
// No API key available, downgrade to tokens mode
|
||||
result = compactMessages(messages, {
|
||||
result = compactMessages(workingMessages, {
|
||||
mode: "tokens",
|
||||
contextWindowTokens: this.contextWindowTokens,
|
||||
systemPrompt: this.systemPrompt,
|
||||
|
|
@ -228,7 +273,7 @@ export class SessionManager {
|
|||
minKeepMessages: this.minKeepMessages,
|
||||
});
|
||||
} else {
|
||||
result = await compactMessagesAsync(messages, {
|
||||
result = await compactMessagesAsync(workingMessages, {
|
||||
mode: "summary",
|
||||
model,
|
||||
apiKey,
|
||||
|
|
@ -247,7 +292,7 @@ export class SessionManager {
|
|||
}
|
||||
}
|
||||
} else {
|
||||
result = compactMessages(messages, {
|
||||
result = compactMessages(workingMessages, {
|
||||
mode: this.compactionMode,
|
||||
// Count mode parameters
|
||||
maxMessages: this.maxMessages,
|
||||
|
|
@ -261,7 +306,14 @@ export class SessionManager {
|
|||
});
|
||||
}
|
||||
|
||||
if (!result) return null;
|
||||
// If no message compaction needed but tool result pruning was applied,
|
||||
// still return the pruned messages
|
||||
if (!result) {
|
||||
if (toolResultPruningApplied) {
|
||||
return { kept: workingMessages, removedCount: 0, reason: "pruning" as const };
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
const entries: SessionEntry[] = [];
|
||||
if (this.meta) {
|
||||
|
|
|
|||
|
|
@ -23,5 +23,5 @@ export type SessionEntry =
|
|||
tokensKept?: number | undefined;
|
||||
/** 摘要模式生成的摘要 */
|
||||
summary?: string | undefined;
|
||||
reason?: "count" | "tokens" | "summary" | undefined;
|
||||
reason?: "count" | "tokens" | "summary" | "pruning" | undefined;
|
||||
};
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
{
|
||||
"$schema": "https://turbo.build/schema.json",
|
||||
"globalDependencies": ["src/**"],
|
||||
"tasks": {
|
||||
"build": {
|
||||
"dependsOn": ["^build"],
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue