From 3dd7ff52d67c007667c50a6271255980fdf57431 Mon Sep 17 00:00:00 2001 From: Jiang Bohan Date: Thu, 5 Feb 2026 15:50:58 +0800 Subject: [PATCH 1/7] feat(context): add tool result pruning for smarter context management Two-phase pruning approach based on OpenClaw's microcompact-style: - Soft Trim (30% utilization): Keep head 1500 + tail 1500 chars of large tool results - Hard Clear (50% utilization): Replace old tool results with placeholder Protections: - Never prunes before first real user message (bootstrap protection) - Protects last 3 assistant messages and their corresponding tool results - Skips image-containing tool results - Respects tool allow/deny lists Enabled by default in tokens/summary compaction modes. --- src/agent/context-window/index.ts | 10 + .../tool-result-pruning.test.ts | 285 ++++++++++ .../context-window/tool-result-pruning.ts | 510 ++++++++++++++++++ src/agent/session/compaction.ts | 3 +- src/agent/session/session-manager.ts | 60 ++- 5 files changed, 863 insertions(+), 5 deletions(-) create mode 100644 src/agent/context-window/tool-result-pruning.test.ts create mode 100644 src/agent/context-window/tool-result-pruning.ts diff --git a/src/agent/context-window/index.ts b/src/agent/context-window/index.ts index ec57f696..440d6b28 100644 --- a/src/agent/context-window/index.ts +++ b/src/agent/context-window/index.ts @@ -44,3 +44,13 @@ export { compactMessagesWithSummary, compactMessagesWithChunkedSummary, } from "./summarization.js"; + +// Tool result pruning +export type { + ToolResultPruningSettings, + ToolResultPruningResult, +} from "./tool-result-pruning.js"; +export { + DEFAULT_TOOL_RESULT_PRUNING_SETTINGS, + pruneToolResults, +} from "./tool-result-pruning.js"; diff --git a/src/agent/context-window/tool-result-pruning.test.ts b/src/agent/context-window/tool-result-pruning.test.ts new file mode 100644 index 00000000..d2677c2a --- /dev/null +++ b/src/agent/context-window/tool-result-pruning.test.ts @@ -0,0 +1,285 @@ +import { describe, it, expect } from "vitest"; +import type { AgentMessage } from "@mariozechner/pi-agent-core"; +import { pruneToolResults, DEFAULT_TOOL_RESULT_PRUNING_SETTINGS } from "./tool-result-pruning.js"; + +// Helper to create a user message with tool result +function createToolResultMessage( + toolName: string, + content: string, + toolUseId: string = "tool-123", +): AgentMessage { + return { + role: "user", + content: [ + { + type: "tool_result", + tool_use_id: toolUseId, + name: toolName, + content: [{ type: "text", text: content }], + }, + ], + } as unknown as AgentMessage; +} + +// Helper to create an assistant message +function createAssistantMessage(text: string): AgentMessage { + return { + role: "assistant", + content: [{ type: "text", text }], + } as unknown as AgentMessage; +} + +// Helper to create a user message +function createUserMessage(text: string): AgentMessage { + return { + role: "user", + content: text, + } as unknown as AgentMessage; +} + +describe("pruneToolResults", () => { + it("returns unchanged if utilization is below softTrimRatio", () => { + const messages = [ + createUserMessage("Hello"), + createAssistantMessage("Hi there!"), + createToolResultMessage("read", "Short content"), + ]; + + const result = pruneToolResults({ + messages, + contextWindowTokens: 200_000, // Very large window + }); + + expect(result.changed).toBe(false); + expect(result.messages).toBe(messages); + expect(result.softTrimmed).toBe(0); + expect(result.hardCleared).toBe(0); + }); + + it("soft trims large tool results", () => { + // Create a message with a large tool result (5000 chars) + const largeContent = "A".repeat(5000); + const messages = [ + createUserMessage("Hello"), + createAssistantMessage("Processing..."), + createToolResultMessage("read", largeContent), + createAssistantMessage("Done!"), + createAssistantMessage("Follow up"), + createAssistantMessage("Another one"), + createAssistantMessage("Protected message"), // This is protected (keepLastAssistants=3) + ]; + + const result = pruneToolResults({ + messages, + contextWindowTokens: 10_000, // Small window to trigger pruning + settings: { + softTrimRatio: 0.1, // Low threshold to ensure pruning + }, + }); + + expect(result.changed).toBe(true); + expect(result.softTrimmed).toBe(1); + + // Check that the trimmed message contains head + tail + const trimmedMsg = result.messages[2] as any; + const trimmedText = trimmedMsg.content[0].content[0].text; + expect(trimmedText).toContain("A".repeat(100)); // Should have some head content + expect(trimmedText).toContain("..."); // Truncation marker + expect(trimmedText).toContain("[Tool result trimmed:"); + }); + + it("hard clears when utilization exceeds hardClearRatio", () => { + // Create multiple messages with large tool results + const largeContent = "X".repeat(10000); + const messages = [ + createUserMessage("Start"), + createAssistantMessage("Processing 1"), + createToolResultMessage("read", largeContent, "tool-1"), + createAssistantMessage("Processing 2"), + createToolResultMessage("exec", largeContent, "tool-2"), + createAssistantMessage("Processing 3"), + createToolResultMessage("glob", largeContent, "tool-3"), + createAssistantMessage("Done 1"), // Protected + createAssistantMessage("Done 2"), // Protected + createAssistantMessage("Done 3"), // Protected + ]; + + const result = pruneToolResults({ + messages, + contextWindowTokens: 5_000, // Very small window + settings: { + softTrimRatio: 0.1, + hardClearRatio: 0.2, + minPrunableToolChars: 1000, // Lower threshold for test + hardClear: { + enabled: true, + placeholder: "[Cleared]", + }, + }, + }); + + expect(result.changed).toBe(true); + // Should have cleared at least some tool results + expect(result.hardCleared).toBeGreaterThan(0); + expect(result.charsSaved).toBeGreaterThan(0); + }); + + it("protects last N assistant messages", () => { + const messages = [ + createUserMessage("Hello"), + createAssistantMessage("First"), + createToolResultMessage("read", "A".repeat(5000), "tool-1"), // Should be prunable + createAssistantMessage("Second"), // Protected (keepLastAssistants=3) + createToolResultMessage("read", "B".repeat(5000), "tool-2"), // In protected zone, should NOT be pruned + createAssistantMessage("Third"), // Protected + createAssistantMessage("Fourth"), // Protected + ]; + + const result = pruneToolResults({ + messages, + contextWindowTokens: 5_000, + settings: { + softTrimRatio: 0.1, + keepLastAssistants: 3, + }, + }); + + // The first tool result (before protected zone) may be pruned + // But the second one (after "Second" assistant which is in protected zone) should not be + if (result.changed) { + // Check that tool-2 result is NOT modified (it's in protected zone) + const tool2Msg = result.messages[4] as any; + const tool2Content = tool2Msg.content[0].content[0].text; + expect(tool2Content).toBe("B".repeat(5000)); // Unchanged + } + }); + + it("never prunes before first user message", () => { + const messages = [ + createAssistantMessage("Bootstrap read"), // Before first user message + createToolResultMessage("read", "A".repeat(5000), "tool-1"), // Should NOT be pruned + createUserMessage("Hello"), // First user message + createAssistantMessage("Response"), + createToolResultMessage("read", "B".repeat(5000), "tool-2"), // Can be pruned + createAssistantMessage("Done 1"), + createAssistantMessage("Done 2"), + createAssistantMessage("Done 3"), + ]; + + const result = pruneToolResults({ + messages, + contextWindowTokens: 5_000, + settings: { + softTrimRatio: 0.1, + }, + }); + + // The first tool result (before first user message) should NOT be modified + const tool1Msg = result.messages[1] as any; + const tool1Content = tool1Msg.content[0].content[0].text; + expect(tool1Content).toBe("A".repeat(5000)); // Unchanged - bootstrap protection + }); + + it("respects tool deny list", () => { + const messages = [ + createUserMessage("Hello"), + createAssistantMessage("Processing"), + createToolResultMessage("read", "A".repeat(5000), "tool-1"), + createAssistantMessage("Done 1"), + createAssistantMessage("Done 2"), + createAssistantMessage("Done 3"), + ]; + + const result = pruneToolResults({ + messages, + contextWindowTokens: 5_000, + settings: { + softTrimRatio: 0.1, + tools: { + deny: ["read"], // Don't prune read tool results + }, + }, + }); + + // read tool should not be pruned + expect(result.changed).toBe(false); + }); + + it("respects tool allow list", () => { + const messages = [ + createUserMessage("Hello"), + createAssistantMessage("Processing"), + createToolResultMessage("read", "A".repeat(5000), "tool-1"), + createToolResultMessage("exec", "B".repeat(5000), "tool-2"), + createAssistantMessage("Done 1"), + createAssistantMessage("Done 2"), + createAssistantMessage("Done 3"), + ]; + + const result = pruneToolResults({ + messages, + contextWindowTokens: 5_000, + settings: { + softTrimRatio: 0.1, + tools: { + allow: ["exec"], // Only prune exec tool results + }, + }, + }); + + if (result.changed) { + // read tool should not be pruned + const tool1Msg = result.messages[2] as any; + const tool1Content = tool1Msg.content[0].content[0].text; + expect(tool1Content).toBe("A".repeat(5000)); // Unchanged + } + }); + + it("skips tool results with images", () => { + const messages = [ + createUserMessage("Hello"), + createAssistantMessage("Processing"), + { + role: "user", + content: [ + { + type: "tool_result", + tool_use_id: "tool-1", + name: "screenshot", + content: [ + { type: "image", source: { type: "base64", data: "abc123" } }, + { type: "text", text: "A".repeat(5000) }, + ], + }, + ], + } as unknown as AgentMessage, + createAssistantMessage("Done 1"), + createAssistantMessage("Done 2"), + createAssistantMessage("Done 3"), + ]; + + const result = pruneToolResults({ + messages, + contextWindowTokens: 5_000, + settings: { + softTrimRatio: 0.1, + }, + }); + + // Image-containing tool result should not be pruned + expect(result.softTrimmed).toBe(0); + expect(result.hardCleared).toBe(0); + }); +}); + +describe("DEFAULT_TOOL_RESULT_PRUNING_SETTINGS", () => { + it("has expected default values", () => { + expect(DEFAULT_TOOL_RESULT_PRUNING_SETTINGS.softTrimRatio).toBe(0.3); + expect(DEFAULT_TOOL_RESULT_PRUNING_SETTINGS.hardClearRatio).toBe(0.5); + expect(DEFAULT_TOOL_RESULT_PRUNING_SETTINGS.keepLastAssistants).toBe(3); + expect(DEFAULT_TOOL_RESULT_PRUNING_SETTINGS.softTrim.maxChars).toBe(4000); + expect(DEFAULT_TOOL_RESULT_PRUNING_SETTINGS.softTrim.headChars).toBe(1500); + expect(DEFAULT_TOOL_RESULT_PRUNING_SETTINGS.softTrim.tailChars).toBe(1500); + expect(DEFAULT_TOOL_RESULT_PRUNING_SETTINGS.hardClear.enabled).toBe(true); + }); +}); diff --git a/src/agent/context-window/tool-result-pruning.ts b/src/agent/context-window/tool-result-pruning.ts new file mode 100644 index 00000000..ef7ac5f1 --- /dev/null +++ b/src/agent/context-window/tool-result-pruning.ts @@ -0,0 +1,510 @@ +/** + * Tool Result Pruning + * + * Smart pruning of tool results to reduce context window usage while preserving + * useful information. Implements two-phase pruning: + * + * 1. Soft Trim: Keep head + tail of large tool results + * 2. Hard Clear: Replace old tool results with placeholder + * + * Based on OpenClaw's microcompact-style context pruning. + */ + +import type { AgentMessage } from "@mariozechner/pi-agent-core"; + +// ─── Types ─────────────────────────────────────────────────────────────────── + +export type ToolResultPruningSettings = { + /** Utilization ratio to start soft trimming (default: 0.3) */ + softTrimRatio: number; + /** Utilization ratio to start hard clearing (default: 0.5) */ + hardClearRatio: number; + /** Minimum prunable tool result chars to consider hard clear (default: 50000) */ + minPrunableToolChars: number; + /** Number of recent assistant messages to protect from pruning (default: 3) */ + keepLastAssistants: number; + /** Soft trim settings */ + softTrim: { + /** Max chars before triggering soft trim (default: 4000) */ + maxChars: number; + /** Chars to keep from start (default: 1500) */ + headChars: number; + /** Chars to keep from end (default: 1500) */ + tailChars: number; + }; + /** Hard clear settings */ + hardClear: { + /** Whether hard clear is enabled (default: true) */ + enabled: boolean; + /** Placeholder text for cleared results */ + placeholder: string; + }; + /** Tool names to allow/deny pruning */ + tools?: { + allow?: string[]; + deny?: string[]; + }; +}; + +export const DEFAULT_TOOL_RESULT_PRUNING_SETTINGS: ToolResultPruningSettings = { + softTrimRatio: 0.3, + hardClearRatio: 0.5, + minPrunableToolChars: 50_000, + keepLastAssistants: 3, + softTrim: { + maxChars: 4_000, + headChars: 1_500, + tailChars: 1_500, + }, + hardClear: { + enabled: true, + placeholder: "[Tool result cleared to save context space]", + }, +}; + +export type ToolResultPruningResult = { + /** Pruned messages */ + messages: AgentMessage[]; + /** Whether any changes were made */ + changed: boolean; + /** Number of soft-trimmed results */ + softTrimmed: number; + /** Number of hard-cleared results */ + hardCleared: number; + /** Estimated chars saved */ + charsSaved: number; +}; + +// ─── Constants ─────────────────────────────────────────────────────────────── + +const CHARS_PER_TOKEN_ESTIMATE = 4; +const IMAGE_CHAR_ESTIMATE = 8_000; + +// ─── Helper Functions ──────────────────────────────────────────────────────── + +/** + * Extract text content from a tool result content block. + */ +function extractToolResultText(content: unknown): string { + if (typeof content === "string") return content; + if (Array.isArray(content)) { + const parts: string[] = []; + for (const block of content) { + if (typeof block === "string") { + parts.push(block); + } else if (block && typeof block === "object") { + if ("text" in block && typeof block.text === "string") { + parts.push(block.text); + } + } + } + return parts.join("\n"); + } + return ""; +} + +/** + * Check if content contains images. + */ +function hasImageContent(content: unknown): boolean { + if (!Array.isArray(content)) return false; + for (const block of content) { + if (block && typeof block === "object" && "type" in block) { + if (block.type === "image") return true; + } + } + return false; +} + +/** + * Estimate character count for a message. + */ +function estimateMessageChars(message: AgentMessage): number { + const msgAny = message as any; + + if (message.role === "user") { + const content = msgAny.content; + if (typeof content === "string") return content.length; + if (!Array.isArray(content)) return 0; + + let chars = 0; + for (const block of content) { + if (typeof block === "string") { + chars += block.length; + } else if (block && typeof block === "object") { + if (block.type === "text" && typeof block.text === "string") { + chars += block.text.length; + } else if (block.type === "tool_result") { + chars += extractToolResultText(block.content).length; + } else if (block.type === "image") { + chars += IMAGE_CHAR_ESTIMATE; + } + } + } + return chars; + } + + if (message.role === "assistant") { + const content = msgAny.content; + if (typeof content === "string") return content.length; + if (!Array.isArray(content)) return 0; + + let chars = 0; + for (const block of content) { + if (typeof block === "string") { + chars += block.length; + } else if (block && typeof block === "object") { + if (block.type === "text" && typeof block.text === "string") { + chars += block.text.length; + } else if (block.type === "thinking" && typeof block.thinking === "string") { + chars += block.thinking.length; + } else if (block.type === "toolCall" || block.type === "tool_use") { + try { + chars += JSON.stringify(block.arguments ?? block.input ?? {}).length; + } catch { + chars += 128; + } + } + } + } + return chars; + } + + return 256; +} + +/** + * Estimate total character count for messages. + */ +function estimateContextChars(messages: AgentMessage[]): number { + return messages.reduce((sum, m) => sum + estimateMessageChars(m), 0); +} + +/** + * Find the index where we should stop protecting assistant messages. + * Returns null if not enough assistant messages exist. + */ +function findAssistantCutoffIndex( + messages: AgentMessage[], + keepLastAssistants: number, +): number | null { + if (keepLastAssistants <= 0) return messages.length; + + let remaining = keepLastAssistants; + for (let i = messages.length - 1; i >= 0; i--) { + if (messages[i]?.role !== "assistant") continue; + remaining--; + if (remaining === 0) return i; + } + + return null; +} + +/** + * Check if a user message is a "real" user message (not just tool results). + * Tool results are sent as user messages but they're not real user input. + */ +function isRealUserMessage(message: AgentMessage): boolean { + if (message.role !== "user") return false; + + const msgAny = message as any; + const content = msgAny.content; + + // String content is a real user message + if (typeof content === "string") return true; + + // Array content - check if it has any non-tool-result blocks + if (Array.isArray(content)) { + for (const block of content) { + if (typeof block === "string") return true; + if (block && typeof block === "object") { + // Any type other than tool_result is real user content + if (block.type !== "tool_result") return true; + } + } + // Only tool_result blocks - not a real user message + return false; + } + + return true; +} + +/** + * Find the index of the first real user message (not tool results). + * This is used for bootstrap protection - we never prune before the first real user input. + */ +function findFirstUserIndex(messages: AgentMessage[]): number | null { + for (let i = 0; i < messages.length; i++) { + const msg = messages[i]; + if (msg && isRealUserMessage(msg)) return i; + } + return null; +} + +/** + * Check if a tool should be pruned based on settings. + */ +function isToolPrunable(toolName: string, settings: ToolResultPruningSettings): boolean { + const { tools } = settings; + if (!tools) return true; + + // If deny list exists and tool is in it, don't prune + if (tools.deny?.includes(toolName)) return false; + + // If allow list exists, only prune if tool is in it + if (tools.allow && tools.allow.length > 0) { + return tools.allow.includes(toolName); + } + + return true; +} + +/** + * Take first N characters from text. + */ +function takeHead(text: string, maxChars: number): string { + if (maxChars <= 0) return ""; + if (text.length <= maxChars) return text; + return text.slice(0, maxChars); +} + +/** + * Take last N characters from text. + */ +function takeTail(text: string, maxChars: number): string { + if (maxChars <= 0) return ""; + if (text.length <= maxChars) return text; + return text.slice(text.length - maxChars); +} + +/** + * Soft trim a tool result text. + */ +function softTrimText( + text: string, + settings: ToolResultPruningSettings, +): { trimmed: string; saved: number } | null { + const { maxChars, headChars, tailChars } = settings.softTrim; + + if (text.length <= maxChars) return null; + if (headChars + tailChars >= text.length) return null; + + const head = takeHead(text, headChars); + const tail = takeTail(text, tailChars); + const note = `\n\n[Tool result trimmed: kept first ${headChars} chars and last ${tailChars} chars of ${text.length} chars.]`; + const trimmed = `${head}\n...\n${tail}${note}`; + + return { + trimmed, + saved: text.length - trimmed.length, + }; +} + +/** + * Process a user message containing tool results. + * Returns modified message if any tool results were trimmed/cleared. + */ +function processUserMessageToolResults( + message: AgentMessage, + settings: ToolResultPruningSettings, + mode: "soft" | "hard", +): { message: AgentMessage; changed: boolean; charsSaved: number } { + const msgAny = message as any; + const content = msgAny.content; + + if (!Array.isArray(content)) { + return { message, changed: false, charsSaved: 0 }; + } + + let changed = false; + let charsSaved = 0; + const newContent: any[] = []; + + for (const block of content) { + if (!block || typeof block !== "object" || block.type !== "tool_result") { + newContent.push(block); + continue; + } + + const toolName = block.name ?? "unknown"; + + // Skip non-prunable tools + if (!isToolPrunable(toolName, settings)) { + newContent.push(block); + continue; + } + + // Skip image-containing tool results + if (hasImageContent(block.content)) { + newContent.push(block); + continue; + } + + const originalText = extractToolResultText(block.content); + + if (mode === "soft") { + const result = softTrimText(originalText, settings); + if (result) { + newContent.push({ + ...block, + content: [{ type: "text", text: result.trimmed }], + }); + changed = true; + charsSaved += result.saved; + } else { + newContent.push(block); + } + } else { + // Hard clear + newContent.push({ + ...block, + content: [{ type: "text", text: settings.hardClear.placeholder }], + }); + changed = true; + charsSaved += originalText.length - settings.hardClear.placeholder.length; + } + } + + if (!changed) { + return { message, changed: false, charsSaved: 0 }; + } + + return { + message: { ...message, content: newContent } as AgentMessage, + changed: true, + charsSaved, + }; +} + +// ─── Main Functions ────────────────────────────────────────────────────────── + +/** + * Prune tool results in messages to reduce context window usage. + * + * Two-phase approach: + * 1. Soft Trim (at softTrimRatio): Keep head + tail of large tool results + * 2. Hard Clear (at hardClearRatio): Replace old tool results with placeholder + * + * Protections: + * - Never prunes before first user message (protects bootstrap/identity reads) + * - Protects last N assistant messages and their corresponding tool results + * - Skips image-containing tool results + * - Respects tool allow/deny lists + */ +export function pruneToolResults(params: { + messages: AgentMessage[]; + contextWindowTokens: number; + settings?: Partial; +}): ToolResultPruningResult { + const { messages, contextWindowTokens } = params; + const settings: ToolResultPruningSettings = { + ...DEFAULT_TOOL_RESULT_PRUNING_SETTINGS, + ...params.settings, + softTrim: { + ...DEFAULT_TOOL_RESULT_PRUNING_SETTINGS.softTrim, + ...params.settings?.softTrim, + }, + hardClear: { + ...DEFAULT_TOOL_RESULT_PRUNING_SETTINGS.hardClear, + ...params.settings?.hardClear, + }, + }; + + const charWindow = contextWindowTokens * CHARS_PER_TOKEN_ESTIMATE; + if (charWindow <= 0) { + return { messages, changed: false, softTrimmed: 0, hardCleared: 0, charsSaved: 0 }; + } + + // Find cutoff index for protected assistant messages + const cutoffIndex = findAssistantCutoffIndex(messages, settings.keepLastAssistants); + if (cutoffIndex === null) { + return { messages, changed: false, softTrimmed: 0, hardCleared: 0, charsSaved: 0 }; + } + + // Never prune before first user message (bootstrap protection) + const firstUserIndex = findFirstUserIndex(messages); + const pruneStartIndex = firstUserIndex === null ? messages.length : firstUserIndex; + + // Calculate current utilization + let totalChars = estimateContextChars(messages); + let ratio = totalChars / charWindow; + + // No pruning needed + if (ratio < settings.softTrimRatio) { + return { messages, changed: false, softTrimmed: 0, hardCleared: 0, charsSaved: 0 }; + } + + let result = messages.slice(); + let changed = false; + let softTrimmed = 0; + let hardCleared = 0; + let charsSaved = 0; + + // Track which messages have prunable tool results + const prunableIndexes: number[] = []; + + // Phase 1: Soft Trim + for (let i = pruneStartIndex; i < cutoffIndex; i++) { + const msg = result[i]; + if (!msg || msg.role !== "user") continue; + + const msgAny = msg as any; + if (!Array.isArray(msgAny.content)) continue; + + // Check if this message has tool results + const hasToolResult = msgAny.content.some( + (b: any) => b && typeof b === "object" && b.type === "tool_result", + ); + if (!hasToolResult) continue; + + prunableIndexes.push(i); + + const processed = processUserMessageToolResults(msg, settings, "soft"); + if (processed.changed) { + result[i] = processed.message; + changed = true; + softTrimmed++; + charsSaved += processed.charsSaved; + totalChars -= processed.charsSaved; + } + } + + // Recalculate ratio after soft trim + ratio = totalChars / charWindow; + + // Phase 2: Hard Clear (if needed) + if (ratio >= settings.hardClearRatio && settings.hardClear.enabled) { + // Check if we have enough prunable content to make hard clear worthwhile + let prunableChars = 0; + for (const i of prunableIndexes) { + prunableChars += estimateMessageChars(result[i]!); + } + + if (prunableChars >= settings.minPrunableToolChars) { + for (const i of prunableIndexes) { + if (ratio < settings.hardClearRatio) break; + + const msg = result[i]!; + const beforeChars = estimateMessageChars(msg); + + const processed = processUserMessageToolResults(msg, settings, "hard"); + if (processed.changed) { + result[i] = processed.message; + changed = true; + hardCleared++; + charsSaved += processed.charsSaved; + totalChars -= processed.charsSaved; + ratio = totalChars / charWindow; + } + } + } + } + + return { + messages: result, + changed, + softTrimmed, + hardCleared, + charsSaved, + }; +} diff --git a/src/agent/session/compaction.ts b/src/agent/session/compaction.ts index 1ce60ae1..18aadb3a 100644 --- a/src/agent/session/compaction.ts +++ b/src/agent/session/compaction.ts @@ -19,7 +19,8 @@ export type CompactionResult = { tokensKept?: number | undefined; /** Summary generated in summary mode */ summary?: string | undefined; - reason: "count" | "tokens" | "summary"; + /** Reason for compaction: count, tokens, summary, or pruning (tool result trimming only) */ + reason: "count" | "tokens" | "summary" | "pruning"; }; /** diff --git a/src/agent/session/session-manager.ts b/src/agent/session/session-manager.ts index fd27505a..bd462a73 100644 --- a/src/agent/session/session-manager.ts +++ b/src/agent/session/session-manager.ts @@ -6,6 +6,10 @@ import { compactMessages, compactMessagesAsync } from "./compaction.js"; import { credentialManager } from "../credentials.js"; import { repairSessionFileIfNeeded, type RepairReport } from "./session-file-repair.js"; import { sanitizeToolCallInputs, sanitizeToolUseResultPairing } from "./session-transcript-repair.js"; +import { + pruneToolResults, + type ToolResultPruningSettings, +} from "../context-window/tool-result-pruning.js"; /** Get Kimi model for summarization (use a cheaper model than k2-thinking) */ function getSummaryModel(): Model { @@ -53,6 +57,12 @@ export type SessionManagerOptions = { apiKey?: string | undefined; /** Custom summary instructions */ customInstructions?: string | undefined; + + // Tool result pruning + /** Whether to enable tool result pruning before compaction (default: true in tokens/summary mode) */ + enableToolResultPruning?: boolean | undefined; + /** Tool result pruning settings */ + toolResultPruning?: Partial | undefined; }; export class SessionManager { @@ -73,6 +83,9 @@ export class SessionManager { private apiKey: string | undefined; private readonly customInstructions: string | undefined; private previousSummary: string | undefined; + // Tool result pruning + private readonly enableToolResultPruning: boolean; + private readonly toolResultPruning: Partial | undefined; private queue: Promise = Promise.resolve(); private meta: SessionMeta | undefined; @@ -100,6 +113,12 @@ export class SessionManager { this.apiKey = options.apiKey; this.customInstructions = options.customInstructions; + // Tool result pruning (enabled by default in tokens/summary mode) + this.enableToolResultPruning = + options.enableToolResultPruning ?? + (this.compactionMode === "tokens" || this.compactionMode === "summary"); + this.toolResultPruning = options.toolResultPruning; + this.meta = this.loadMeta(); } @@ -194,6 +213,32 @@ export class SessionManager { } async maybeCompact(messages: AgentMessage[]) { + let workingMessages = messages; + let toolResultPruningApplied = false; + + // Phase 1: Tool result pruning (soft trim / hard clear) + // This reduces token usage without removing messages + if (this.enableToolResultPruning) { + const pruneResult = pruneToolResults({ + messages: workingMessages, + contextWindowTokens: this.contextWindowTokens, + settings: this.toolResultPruning, + }); + + if (pruneResult.changed) { + workingMessages = pruneResult.messages; + toolResultPruningApplied = true; + // Log pruning stats + if (pruneResult.softTrimmed > 0 || pruneResult.hardCleared > 0) { + console.error( + `[SessionManager] Tool result pruning: ${pruneResult.softTrimmed} soft-trimmed, ` + + `${pruneResult.hardCleared} hard-cleared, ~${Math.round(pruneResult.charsSaved / 1000)}k chars saved`, + ); + } + } + } + + // Phase 2: Message compaction (remove old messages if still needed) let result; if (this.compactionMode === "summary") { @@ -203,7 +248,7 @@ export class SessionManager { if (!apiKey) { // No API key available, downgrade to tokens mode - result = compactMessages(messages, { + result = compactMessages(workingMessages, { mode: "tokens", contextWindowTokens: this.contextWindowTokens, systemPrompt: this.systemPrompt, @@ -212,7 +257,7 @@ export class SessionManager { minKeepMessages: this.minKeepMessages, }); } else { - result = await compactMessagesAsync(messages, { + result = await compactMessagesAsync(workingMessages, { mode: "summary", model, apiKey, @@ -231,7 +276,7 @@ export class SessionManager { } } } else { - result = compactMessages(messages, { + result = compactMessages(workingMessages, { mode: this.compactionMode, // Count mode parameters maxMessages: this.maxMessages, @@ -245,7 +290,14 @@ export class SessionManager { }); } - if (!result) return null; + // If no message compaction needed but tool result pruning was applied, + // still return the pruned messages + if (!result) { + if (toolResultPruningApplied) { + return { kept: workingMessages, removedCount: 0, reason: "pruning" as const }; + } + return null; + } const entries: SessionEntry[] = []; if (this.meta) { From a7f1c56e0945e327c16c9e3770bf05800b938e6f Mon Sep 17 00:00:00 2001 From: Jiang Bohan Date: Thu, 5 Feb 2026 15:55:30 +0800 Subject: [PATCH 2/7] fix(profile): resolve exactOptionalPropertyTypes type error in updateStyle --- src/agent/profile/index.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/agent/profile/index.ts b/src/agent/profile/index.ts index 54e0b0f7..cf7423e0 100644 --- a/src/agent/profile/index.ts +++ b/src/agent/profile/index.ts @@ -292,10 +292,10 @@ export class ProfileManager { updateStyle(style: string): void { const profile = this.getOrCreateProfile(false); const currentConfig = profile.config ?? {}; - const newConfig: ProfileConfig = { - ...currentConfig, + // Use Object.assign to avoid exactOptionalPropertyTypes issues with spread + const newConfig: ProfileConfig = Object.assign({}, currentConfig, { style: style as ProfileConfig["style"], - }; + }); profile.config = newConfig; this.profile = profile; writeProfileConfig(this.profileId, newConfig, { baseDir: this.baseDir }); From d2827ae9482bdaeab3194c41f42abbd4f9db3888 Mon Sep 17 00:00:00 2001 From: Jiang Bohan Date: Thu, 5 Feb 2026 16:01:10 +0800 Subject: [PATCH 3/7] fix(session): add pruning to SessionEntry reason type The CompactionResult type includes "pruning" as a reason but SessionEntry did not, causing type errors when writing compaction entries. Co-Authored-By: Claude Opus 4.5 --- src/agent/session/types.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/agent/session/types.ts b/src/agent/session/types.ts index 72c810ab..2d311902 100644 --- a/src/agent/session/types.ts +++ b/src/agent/session/types.ts @@ -23,5 +23,5 @@ export type SessionEntry = tokensKept?: number | undefined; /** 摘要模式生成的摘要 */ summary?: string | undefined; - reason?: "count" | "tokens" | "summary" | undefined; + reason?: "count" | "tokens" | "summary" | "pruning" | undefined; }; From 17fc77e44dde898cd5470e835761df71a1e88219 Mon Sep 17 00:00:00 2001 From: Jiang Bohan Date: Thu, 5 Feb 2026 16:11:35 +0800 Subject: [PATCH 4/7] chore: trigger CI rebuild From 4e61155e5ee47c949353df33d6a297269d265b77 Mon Sep 17 00:00:00 2001 From: Jiang Bohan Date: Thu, 5 Feb 2026 16:16:33 +0800 Subject: [PATCH 5/7] chore(turbo): add src as global dependency for cache invalidation The desktop app imports from the root src directory, but turbo wasn't tracking those files for cache hash calculation. This caused CI builds to use stale hashes when types changed in the root src directory. Co-Authored-By: Claude Opus 4.5 --- turbo.json | 1 + 1 file changed, 1 insertion(+) diff --git a/turbo.json b/turbo.json index d91a416f..22c06f3b 100644 --- a/turbo.json +++ b/turbo.json @@ -1,5 +1,6 @@ { "$schema": "https://turbo.build/schema.json", + "globalDependencies": ["src/**"], "tasks": { "build": { "dependsOn": ["^build"], From 71e44bebc09037a46c57d2bb107b8ec63f111fec Mon Sep 17 00:00:00 2001 From: Jiang Bohan Date: Thu, 5 Feb 2026 16:20:40 +0800 Subject: [PATCH 6/7] fix(session): add explicit return type to maybeCompact method Adding explicit return type to help TypeScript resolve the type correctly across different build configurations. Co-Authored-By: Claude Opus 4.5 --- src/agent/session/session-manager.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/agent/session/session-manager.ts b/src/agent/session/session-manager.ts index bd462a73..2c8d78d4 100644 --- a/src/agent/session/session-manager.ts +++ b/src/agent/session/session-manager.ts @@ -2,7 +2,7 @@ import type { AgentMessage } from "@mariozechner/pi-agent-core"; import { getModel, type Model } from "@mariozechner/pi-ai"; import type { SessionEntry, SessionMeta } from "./types.js"; import { appendEntry, readEntries, resolveSessionPath, writeEntries } from "./storage.js"; -import { compactMessages, compactMessagesAsync } from "./compaction.js"; +import { compactMessages, compactMessagesAsync, type CompactionResult } from "./compaction.js"; import { credentialManager } from "../credentials.js"; import { repairSessionFileIfNeeded, type RepairReport } from "./session-file-repair.js"; import { sanitizeToolCallInputs, sanitizeToolUseResultPairing } from "./session-transcript-repair.js"; @@ -212,7 +212,7 @@ export class SessionManager { ); } - async maybeCompact(messages: AgentMessage[]) { + async maybeCompact(messages: AgentMessage[]): Promise { let workingMessages = messages; let toolResultPruningApplied = false; From d46d647cfb8a359ae99df0e0b2b12bf24dd32f08 Mon Sep 17 00:00:00 2001 From: Jiang Bohan Date: Thu, 5 Feb 2026 16:22:41 +0800 Subject: [PATCH 7/7] chore: trigger CI