diff --git a/src/agent/context-window/index.ts b/src/agent/context-window/index.ts index ec57f696..440d6b28 100644 --- a/src/agent/context-window/index.ts +++ b/src/agent/context-window/index.ts @@ -44,3 +44,13 @@ export { compactMessagesWithSummary, compactMessagesWithChunkedSummary, } from "./summarization.js"; + +// Tool result pruning +export type { + ToolResultPruningSettings, + ToolResultPruningResult, +} from "./tool-result-pruning.js"; +export { + DEFAULT_TOOL_RESULT_PRUNING_SETTINGS, + pruneToolResults, +} from "./tool-result-pruning.js"; diff --git a/src/agent/context-window/tool-result-pruning.test.ts b/src/agent/context-window/tool-result-pruning.test.ts new file mode 100644 index 00000000..d2677c2a --- /dev/null +++ b/src/agent/context-window/tool-result-pruning.test.ts @@ -0,0 +1,285 @@ +import { describe, it, expect } from "vitest"; +import type { AgentMessage } from "@mariozechner/pi-agent-core"; +import { pruneToolResults, DEFAULT_TOOL_RESULT_PRUNING_SETTINGS } from "./tool-result-pruning.js"; + +// Helper to create a user message with tool result +function createToolResultMessage( + toolName: string, + content: string, + toolUseId: string = "tool-123", +): AgentMessage { + return { + role: "user", + content: [ + { + type: "tool_result", + tool_use_id: toolUseId, + name: toolName, + content: [{ type: "text", text: content }], + }, + ], + } as unknown as AgentMessage; +} + +// Helper to create an assistant message +function createAssistantMessage(text: string): AgentMessage { + return { + role: "assistant", + content: [{ type: "text", text }], + } as unknown as AgentMessage; +} + +// Helper to create a user message +function createUserMessage(text: string): AgentMessage { + return { + role: "user", + content: text, + } as unknown as AgentMessage; +} + +describe("pruneToolResults", () => { + it("returns unchanged if utilization is below softTrimRatio", () => { + const messages = [ + createUserMessage("Hello"), + createAssistantMessage("Hi there!"), + createToolResultMessage("read", "Short content"), + ]; + + const result = pruneToolResults({ + messages, + contextWindowTokens: 200_000, // Very large window + }); + + expect(result.changed).toBe(false); + expect(result.messages).toBe(messages); + expect(result.softTrimmed).toBe(0); + expect(result.hardCleared).toBe(0); + }); + + it("soft trims large tool results", () => { + // Create a message with a large tool result (5000 chars) + const largeContent = "A".repeat(5000); + const messages = [ + createUserMessage("Hello"), + createAssistantMessage("Processing..."), + createToolResultMessage("read", largeContent), + createAssistantMessage("Done!"), + createAssistantMessage("Follow up"), + createAssistantMessage("Another one"), + createAssistantMessage("Protected message"), // This is protected (keepLastAssistants=3) + ]; + + const result = pruneToolResults({ + messages, + contextWindowTokens: 10_000, // Small window to trigger pruning + settings: { + softTrimRatio: 0.1, // Low threshold to ensure pruning + }, + }); + + expect(result.changed).toBe(true); + expect(result.softTrimmed).toBe(1); + + // Check that the trimmed message contains head + tail + const trimmedMsg = result.messages[2] as any; + const trimmedText = trimmedMsg.content[0].content[0].text; + expect(trimmedText).toContain("A".repeat(100)); // Should have some head content + expect(trimmedText).toContain("..."); // Truncation marker + expect(trimmedText).toContain("[Tool result trimmed:"); + }); + + it("hard clears when utilization exceeds hardClearRatio", () => { + // Create multiple messages with large tool results + const largeContent = "X".repeat(10000); + const messages = [ + createUserMessage("Start"), + createAssistantMessage("Processing 1"), + createToolResultMessage("read", largeContent, "tool-1"), + createAssistantMessage("Processing 2"), + createToolResultMessage("exec", largeContent, "tool-2"), + createAssistantMessage("Processing 3"), + createToolResultMessage("glob", largeContent, "tool-3"), + createAssistantMessage("Done 1"), // Protected + createAssistantMessage("Done 2"), // Protected + createAssistantMessage("Done 3"), // Protected + ]; + + const result = pruneToolResults({ + messages, + contextWindowTokens: 5_000, // Very small window + settings: { + softTrimRatio: 0.1, + hardClearRatio: 0.2, + minPrunableToolChars: 1000, // Lower threshold for test + hardClear: { + enabled: true, + placeholder: "[Cleared]", + }, + }, + }); + + expect(result.changed).toBe(true); + // Should have cleared at least some tool results + expect(result.hardCleared).toBeGreaterThan(0); + expect(result.charsSaved).toBeGreaterThan(0); + }); + + it("protects last N assistant messages", () => { + const messages = [ + createUserMessage("Hello"), + createAssistantMessage("First"), + createToolResultMessage("read", "A".repeat(5000), "tool-1"), // Should be prunable + createAssistantMessage("Second"), // Protected (keepLastAssistants=3) + createToolResultMessage("read", "B".repeat(5000), "tool-2"), // In protected zone, should NOT be pruned + createAssistantMessage("Third"), // Protected + createAssistantMessage("Fourth"), // Protected + ]; + + const result = pruneToolResults({ + messages, + contextWindowTokens: 5_000, + settings: { + softTrimRatio: 0.1, + keepLastAssistants: 3, + }, + }); + + // The first tool result (before protected zone) may be pruned + // But the second one (after "Second" assistant which is in protected zone) should not be + if (result.changed) { + // Check that tool-2 result is NOT modified (it's in protected zone) + const tool2Msg = result.messages[4] as any; + const tool2Content = tool2Msg.content[0].content[0].text; + expect(tool2Content).toBe("B".repeat(5000)); // Unchanged + } + }); + + it("never prunes before first user message", () => { + const messages = [ + createAssistantMessage("Bootstrap read"), // Before first user message + createToolResultMessage("read", "A".repeat(5000), "tool-1"), // Should NOT be pruned + createUserMessage("Hello"), // First user message + createAssistantMessage("Response"), + createToolResultMessage("read", "B".repeat(5000), "tool-2"), // Can be pruned + createAssistantMessage("Done 1"), + createAssistantMessage("Done 2"), + createAssistantMessage("Done 3"), + ]; + + const result = pruneToolResults({ + messages, + contextWindowTokens: 5_000, + settings: { + softTrimRatio: 0.1, + }, + }); + + // The first tool result (before first user message) should NOT be modified + const tool1Msg = result.messages[1] as any; + const tool1Content = tool1Msg.content[0].content[0].text; + expect(tool1Content).toBe("A".repeat(5000)); // Unchanged - bootstrap protection + }); + + it("respects tool deny list", () => { + const messages = [ + createUserMessage("Hello"), + createAssistantMessage("Processing"), + createToolResultMessage("read", "A".repeat(5000), "tool-1"), + createAssistantMessage("Done 1"), + createAssistantMessage("Done 2"), + createAssistantMessage("Done 3"), + ]; + + const result = pruneToolResults({ + messages, + contextWindowTokens: 5_000, + settings: { + softTrimRatio: 0.1, + tools: { + deny: ["read"], // Don't prune read tool results + }, + }, + }); + + // read tool should not be pruned + expect(result.changed).toBe(false); + }); + + it("respects tool allow list", () => { + const messages = [ + createUserMessage("Hello"), + createAssistantMessage("Processing"), + createToolResultMessage("read", "A".repeat(5000), "tool-1"), + createToolResultMessage("exec", "B".repeat(5000), "tool-2"), + createAssistantMessage("Done 1"), + createAssistantMessage("Done 2"), + createAssistantMessage("Done 3"), + ]; + + const result = pruneToolResults({ + messages, + contextWindowTokens: 5_000, + settings: { + softTrimRatio: 0.1, + tools: { + allow: ["exec"], // Only prune exec tool results + }, + }, + }); + + if (result.changed) { + // read tool should not be pruned + const tool1Msg = result.messages[2] as any; + const tool1Content = tool1Msg.content[0].content[0].text; + expect(tool1Content).toBe("A".repeat(5000)); // Unchanged + } + }); + + it("skips tool results with images", () => { + const messages = [ + createUserMessage("Hello"), + createAssistantMessage("Processing"), + { + role: "user", + content: [ + { + type: "tool_result", + tool_use_id: "tool-1", + name: "screenshot", + content: [ + { type: "image", source: { type: "base64", data: "abc123" } }, + { type: "text", text: "A".repeat(5000) }, + ], + }, + ], + } as unknown as AgentMessage, + createAssistantMessage("Done 1"), + createAssistantMessage("Done 2"), + createAssistantMessage("Done 3"), + ]; + + const result = pruneToolResults({ + messages, + contextWindowTokens: 5_000, + settings: { + softTrimRatio: 0.1, + }, + }); + + // Image-containing tool result should not be pruned + expect(result.softTrimmed).toBe(0); + expect(result.hardCleared).toBe(0); + }); +}); + +describe("DEFAULT_TOOL_RESULT_PRUNING_SETTINGS", () => { + it("has expected default values", () => { + expect(DEFAULT_TOOL_RESULT_PRUNING_SETTINGS.softTrimRatio).toBe(0.3); + expect(DEFAULT_TOOL_RESULT_PRUNING_SETTINGS.hardClearRatio).toBe(0.5); + expect(DEFAULT_TOOL_RESULT_PRUNING_SETTINGS.keepLastAssistants).toBe(3); + expect(DEFAULT_TOOL_RESULT_PRUNING_SETTINGS.softTrim.maxChars).toBe(4000); + expect(DEFAULT_TOOL_RESULT_PRUNING_SETTINGS.softTrim.headChars).toBe(1500); + expect(DEFAULT_TOOL_RESULT_PRUNING_SETTINGS.softTrim.tailChars).toBe(1500); + expect(DEFAULT_TOOL_RESULT_PRUNING_SETTINGS.hardClear.enabled).toBe(true); + }); +}); diff --git a/src/agent/context-window/tool-result-pruning.ts b/src/agent/context-window/tool-result-pruning.ts new file mode 100644 index 00000000..ef7ac5f1 --- /dev/null +++ b/src/agent/context-window/tool-result-pruning.ts @@ -0,0 +1,510 @@ +/** + * Tool Result Pruning + * + * Smart pruning of tool results to reduce context window usage while preserving + * useful information. Implements two-phase pruning: + * + * 1. Soft Trim: Keep head + tail of large tool results + * 2. Hard Clear: Replace old tool results with placeholder + * + * Based on OpenClaw's microcompact-style context pruning. + */ + +import type { AgentMessage } from "@mariozechner/pi-agent-core"; + +// ─── Types ─────────────────────────────────────────────────────────────────── + +export type ToolResultPruningSettings = { + /** Utilization ratio to start soft trimming (default: 0.3) */ + softTrimRatio: number; + /** Utilization ratio to start hard clearing (default: 0.5) */ + hardClearRatio: number; + /** Minimum prunable tool result chars to consider hard clear (default: 50000) */ + minPrunableToolChars: number; + /** Number of recent assistant messages to protect from pruning (default: 3) */ + keepLastAssistants: number; + /** Soft trim settings */ + softTrim: { + /** Max chars before triggering soft trim (default: 4000) */ + maxChars: number; + /** Chars to keep from start (default: 1500) */ + headChars: number; + /** Chars to keep from end (default: 1500) */ + tailChars: number; + }; + /** Hard clear settings */ + hardClear: { + /** Whether hard clear is enabled (default: true) */ + enabled: boolean; + /** Placeholder text for cleared results */ + placeholder: string; + }; + /** Tool names to allow/deny pruning */ + tools?: { + allow?: string[]; + deny?: string[]; + }; +}; + +export const DEFAULT_TOOL_RESULT_PRUNING_SETTINGS: ToolResultPruningSettings = { + softTrimRatio: 0.3, + hardClearRatio: 0.5, + minPrunableToolChars: 50_000, + keepLastAssistants: 3, + softTrim: { + maxChars: 4_000, + headChars: 1_500, + tailChars: 1_500, + }, + hardClear: { + enabled: true, + placeholder: "[Tool result cleared to save context space]", + }, +}; + +export type ToolResultPruningResult = { + /** Pruned messages */ + messages: AgentMessage[]; + /** Whether any changes were made */ + changed: boolean; + /** Number of soft-trimmed results */ + softTrimmed: number; + /** Number of hard-cleared results */ + hardCleared: number; + /** Estimated chars saved */ + charsSaved: number; +}; + +// ─── Constants ─────────────────────────────────────────────────────────────── + +const CHARS_PER_TOKEN_ESTIMATE = 4; +const IMAGE_CHAR_ESTIMATE = 8_000; + +// ─── Helper Functions ──────────────────────────────────────────────────────── + +/** + * Extract text content from a tool result content block. + */ +function extractToolResultText(content: unknown): string { + if (typeof content === "string") return content; + if (Array.isArray(content)) { + const parts: string[] = []; + for (const block of content) { + if (typeof block === "string") { + parts.push(block); + } else if (block && typeof block === "object") { + if ("text" in block && typeof block.text === "string") { + parts.push(block.text); + } + } + } + return parts.join("\n"); + } + return ""; +} + +/** + * Check if content contains images. + */ +function hasImageContent(content: unknown): boolean { + if (!Array.isArray(content)) return false; + for (const block of content) { + if (block && typeof block === "object" && "type" in block) { + if (block.type === "image") return true; + } + } + return false; +} + +/** + * Estimate character count for a message. + */ +function estimateMessageChars(message: AgentMessage): number { + const msgAny = message as any; + + if (message.role === "user") { + const content = msgAny.content; + if (typeof content === "string") return content.length; + if (!Array.isArray(content)) return 0; + + let chars = 0; + for (const block of content) { + if (typeof block === "string") { + chars += block.length; + } else if (block && typeof block === "object") { + if (block.type === "text" && typeof block.text === "string") { + chars += block.text.length; + } else if (block.type === "tool_result") { + chars += extractToolResultText(block.content).length; + } else if (block.type === "image") { + chars += IMAGE_CHAR_ESTIMATE; + } + } + } + return chars; + } + + if (message.role === "assistant") { + const content = msgAny.content; + if (typeof content === "string") return content.length; + if (!Array.isArray(content)) return 0; + + let chars = 0; + for (const block of content) { + if (typeof block === "string") { + chars += block.length; + } else if (block && typeof block === "object") { + if (block.type === "text" && typeof block.text === "string") { + chars += block.text.length; + } else if (block.type === "thinking" && typeof block.thinking === "string") { + chars += block.thinking.length; + } else if (block.type === "toolCall" || block.type === "tool_use") { + try { + chars += JSON.stringify(block.arguments ?? block.input ?? {}).length; + } catch { + chars += 128; + } + } + } + } + return chars; + } + + return 256; +} + +/** + * Estimate total character count for messages. + */ +function estimateContextChars(messages: AgentMessage[]): number { + return messages.reduce((sum, m) => sum + estimateMessageChars(m), 0); +} + +/** + * Find the index where we should stop protecting assistant messages. + * Returns null if not enough assistant messages exist. + */ +function findAssistantCutoffIndex( + messages: AgentMessage[], + keepLastAssistants: number, +): number | null { + if (keepLastAssistants <= 0) return messages.length; + + let remaining = keepLastAssistants; + for (let i = messages.length - 1; i >= 0; i--) { + if (messages[i]?.role !== "assistant") continue; + remaining--; + if (remaining === 0) return i; + } + + return null; +} + +/** + * Check if a user message is a "real" user message (not just tool results). + * Tool results are sent as user messages but they're not real user input. + */ +function isRealUserMessage(message: AgentMessage): boolean { + if (message.role !== "user") return false; + + const msgAny = message as any; + const content = msgAny.content; + + // String content is a real user message + if (typeof content === "string") return true; + + // Array content - check if it has any non-tool-result blocks + if (Array.isArray(content)) { + for (const block of content) { + if (typeof block === "string") return true; + if (block && typeof block === "object") { + // Any type other than tool_result is real user content + if (block.type !== "tool_result") return true; + } + } + // Only tool_result blocks - not a real user message + return false; + } + + return true; +} + +/** + * Find the index of the first real user message (not tool results). + * This is used for bootstrap protection - we never prune before the first real user input. + */ +function findFirstUserIndex(messages: AgentMessage[]): number | null { + for (let i = 0; i < messages.length; i++) { + const msg = messages[i]; + if (msg && isRealUserMessage(msg)) return i; + } + return null; +} + +/** + * Check if a tool should be pruned based on settings. + */ +function isToolPrunable(toolName: string, settings: ToolResultPruningSettings): boolean { + const { tools } = settings; + if (!tools) return true; + + // If deny list exists and tool is in it, don't prune + if (tools.deny?.includes(toolName)) return false; + + // If allow list exists, only prune if tool is in it + if (tools.allow && tools.allow.length > 0) { + return tools.allow.includes(toolName); + } + + return true; +} + +/** + * Take first N characters from text. + */ +function takeHead(text: string, maxChars: number): string { + if (maxChars <= 0) return ""; + if (text.length <= maxChars) return text; + return text.slice(0, maxChars); +} + +/** + * Take last N characters from text. + */ +function takeTail(text: string, maxChars: number): string { + if (maxChars <= 0) return ""; + if (text.length <= maxChars) return text; + return text.slice(text.length - maxChars); +} + +/** + * Soft trim a tool result text. + */ +function softTrimText( + text: string, + settings: ToolResultPruningSettings, +): { trimmed: string; saved: number } | null { + const { maxChars, headChars, tailChars } = settings.softTrim; + + if (text.length <= maxChars) return null; + if (headChars + tailChars >= text.length) return null; + + const head = takeHead(text, headChars); + const tail = takeTail(text, tailChars); + const note = `\n\n[Tool result trimmed: kept first ${headChars} chars and last ${tailChars} chars of ${text.length} chars.]`; + const trimmed = `${head}\n...\n${tail}${note}`; + + return { + trimmed, + saved: text.length - trimmed.length, + }; +} + +/** + * Process a user message containing tool results. + * Returns modified message if any tool results were trimmed/cleared. + */ +function processUserMessageToolResults( + message: AgentMessage, + settings: ToolResultPruningSettings, + mode: "soft" | "hard", +): { message: AgentMessage; changed: boolean; charsSaved: number } { + const msgAny = message as any; + const content = msgAny.content; + + if (!Array.isArray(content)) { + return { message, changed: false, charsSaved: 0 }; + } + + let changed = false; + let charsSaved = 0; + const newContent: any[] = []; + + for (const block of content) { + if (!block || typeof block !== "object" || block.type !== "tool_result") { + newContent.push(block); + continue; + } + + const toolName = block.name ?? "unknown"; + + // Skip non-prunable tools + if (!isToolPrunable(toolName, settings)) { + newContent.push(block); + continue; + } + + // Skip image-containing tool results + if (hasImageContent(block.content)) { + newContent.push(block); + continue; + } + + const originalText = extractToolResultText(block.content); + + if (mode === "soft") { + const result = softTrimText(originalText, settings); + if (result) { + newContent.push({ + ...block, + content: [{ type: "text", text: result.trimmed }], + }); + changed = true; + charsSaved += result.saved; + } else { + newContent.push(block); + } + } else { + // Hard clear + newContent.push({ + ...block, + content: [{ type: "text", text: settings.hardClear.placeholder }], + }); + changed = true; + charsSaved += originalText.length - settings.hardClear.placeholder.length; + } + } + + if (!changed) { + return { message, changed: false, charsSaved: 0 }; + } + + return { + message: { ...message, content: newContent } as AgentMessage, + changed: true, + charsSaved, + }; +} + +// ─── Main Functions ────────────────────────────────────────────────────────── + +/** + * Prune tool results in messages to reduce context window usage. + * + * Two-phase approach: + * 1. Soft Trim (at softTrimRatio): Keep head + tail of large tool results + * 2. Hard Clear (at hardClearRatio): Replace old tool results with placeholder + * + * Protections: + * - Never prunes before first user message (protects bootstrap/identity reads) + * - Protects last N assistant messages and their corresponding tool results + * - Skips image-containing tool results + * - Respects tool allow/deny lists + */ +export function pruneToolResults(params: { + messages: AgentMessage[]; + contextWindowTokens: number; + settings?: Partial; +}): ToolResultPruningResult { + const { messages, contextWindowTokens } = params; + const settings: ToolResultPruningSettings = { + ...DEFAULT_TOOL_RESULT_PRUNING_SETTINGS, + ...params.settings, + softTrim: { + ...DEFAULT_TOOL_RESULT_PRUNING_SETTINGS.softTrim, + ...params.settings?.softTrim, + }, + hardClear: { + ...DEFAULT_TOOL_RESULT_PRUNING_SETTINGS.hardClear, + ...params.settings?.hardClear, + }, + }; + + const charWindow = contextWindowTokens * CHARS_PER_TOKEN_ESTIMATE; + if (charWindow <= 0) { + return { messages, changed: false, softTrimmed: 0, hardCleared: 0, charsSaved: 0 }; + } + + // Find cutoff index for protected assistant messages + const cutoffIndex = findAssistantCutoffIndex(messages, settings.keepLastAssistants); + if (cutoffIndex === null) { + return { messages, changed: false, softTrimmed: 0, hardCleared: 0, charsSaved: 0 }; + } + + // Never prune before first user message (bootstrap protection) + const firstUserIndex = findFirstUserIndex(messages); + const pruneStartIndex = firstUserIndex === null ? messages.length : firstUserIndex; + + // Calculate current utilization + let totalChars = estimateContextChars(messages); + let ratio = totalChars / charWindow; + + // No pruning needed + if (ratio < settings.softTrimRatio) { + return { messages, changed: false, softTrimmed: 0, hardCleared: 0, charsSaved: 0 }; + } + + let result = messages.slice(); + let changed = false; + let softTrimmed = 0; + let hardCleared = 0; + let charsSaved = 0; + + // Track which messages have prunable tool results + const prunableIndexes: number[] = []; + + // Phase 1: Soft Trim + for (let i = pruneStartIndex; i < cutoffIndex; i++) { + const msg = result[i]; + if (!msg || msg.role !== "user") continue; + + const msgAny = msg as any; + if (!Array.isArray(msgAny.content)) continue; + + // Check if this message has tool results + const hasToolResult = msgAny.content.some( + (b: any) => b && typeof b === "object" && b.type === "tool_result", + ); + if (!hasToolResult) continue; + + prunableIndexes.push(i); + + const processed = processUserMessageToolResults(msg, settings, "soft"); + if (processed.changed) { + result[i] = processed.message; + changed = true; + softTrimmed++; + charsSaved += processed.charsSaved; + totalChars -= processed.charsSaved; + } + } + + // Recalculate ratio after soft trim + ratio = totalChars / charWindow; + + // Phase 2: Hard Clear (if needed) + if (ratio >= settings.hardClearRatio && settings.hardClear.enabled) { + // Check if we have enough prunable content to make hard clear worthwhile + let prunableChars = 0; + for (const i of prunableIndexes) { + prunableChars += estimateMessageChars(result[i]!); + } + + if (prunableChars >= settings.minPrunableToolChars) { + for (const i of prunableIndexes) { + if (ratio < settings.hardClearRatio) break; + + const msg = result[i]!; + const beforeChars = estimateMessageChars(msg); + + const processed = processUserMessageToolResults(msg, settings, "hard"); + if (processed.changed) { + result[i] = processed.message; + changed = true; + hardCleared++; + charsSaved += processed.charsSaved; + totalChars -= processed.charsSaved; + ratio = totalChars / charWindow; + } + } + } + } + + return { + messages: result, + changed, + softTrimmed, + hardCleared, + charsSaved, + }; +} diff --git a/src/agent/events.ts b/src/agent/events.ts index 6177b912..8eb8b422 100644 --- a/src/agent/events.ts +++ b/src/agent/events.ts @@ -23,7 +23,7 @@ export type CompactionEndEvent = { kept: number; tokensRemoved?: number; tokensKept?: number; - reason: "count" | "tokens" | "summary"; + reason: "count" | "tokens" | "summary" | "pruning"; }; /** Union of all Multica-specific events */ diff --git a/src/agent/profile/index.ts b/src/agent/profile/index.ts index 54e0b0f7..cf7423e0 100644 --- a/src/agent/profile/index.ts +++ b/src/agent/profile/index.ts @@ -292,10 +292,10 @@ export class ProfileManager { updateStyle(style: string): void { const profile = this.getOrCreateProfile(false); const currentConfig = profile.config ?? {}; - const newConfig: ProfileConfig = { - ...currentConfig, + // Use Object.assign to avoid exactOptionalPropertyTypes issues with spread + const newConfig: ProfileConfig = Object.assign({}, currentConfig, { style: style as ProfileConfig["style"], - }; + }); profile.config = newConfig; this.profile = profile; writeProfileConfig(this.profileId, newConfig, { baseDir: this.baseDir }); diff --git a/src/agent/session/compaction.ts b/src/agent/session/compaction.ts index 1ce60ae1..18aadb3a 100644 --- a/src/agent/session/compaction.ts +++ b/src/agent/session/compaction.ts @@ -19,7 +19,8 @@ export type CompactionResult = { tokensKept?: number | undefined; /** Summary generated in summary mode */ summary?: string | undefined; - reason: "count" | "tokens" | "summary"; + /** Reason for compaction: count, tokens, summary, or pruning (tool result trimming only) */ + reason: "count" | "tokens" | "summary" | "pruning"; }; /** diff --git a/src/agent/session/session-manager.ts b/src/agent/session/session-manager.ts index bb55721a..eece48b1 100644 --- a/src/agent/session/session-manager.ts +++ b/src/agent/session/session-manager.ts @@ -2,11 +2,15 @@ import type { AgentMessage } from "@mariozechner/pi-agent-core"; import { getModel, type Model } from "@mariozechner/pi-ai"; import type { SessionEntry, SessionMeta } from "./types.js"; import { appendEntry, readEntries, resolveSessionPath, writeEntries } from "./storage.js"; -import { compactMessages, compactMessagesAsync } from "./compaction.js"; +import { compactMessages, compactMessagesAsync, type CompactionResult } from "./compaction.js"; import { estimateTokenUsage, shouldCompact as shouldCompactTokens } from "../context-window/index.js"; import { credentialManager } from "../credentials.js"; import { repairSessionFileIfNeeded, type RepairReport } from "./session-file-repair.js"; import { sanitizeToolCallInputs, sanitizeToolUseResultPairing } from "./session-transcript-repair.js"; +import { + pruneToolResults, + type ToolResultPruningSettings, +} from "../context-window/tool-result-pruning.js"; /** Get Kimi model for summarization (use a cheaper model than k2-thinking) */ function getSummaryModel(): Model { @@ -54,6 +58,12 @@ export type SessionManagerOptions = { apiKey?: string | undefined; /** Custom summary instructions */ customInstructions?: string | undefined; + + // Tool result pruning + /** Whether to enable tool result pruning before compaction (default: true in tokens/summary mode) */ + enableToolResultPruning?: boolean | undefined; + /** Tool result pruning settings */ + toolResultPruning?: Partial | undefined; }; export class SessionManager { @@ -74,6 +84,9 @@ export class SessionManager { private apiKey: string | undefined; private readonly customInstructions: string | undefined; private previousSummary: string | undefined; + // Tool result pruning + private readonly enableToolResultPruning: boolean; + private readonly toolResultPruning: Partial | undefined; private queue: Promise = Promise.resolve(); private meta: SessionMeta | undefined; @@ -101,6 +114,12 @@ export class SessionManager { this.apiKey = options.apiKey; this.customInstructions = options.customInstructions; + // Tool result pruning (enabled by default in tokens/summary mode) + this.enableToolResultPruning = + options.enableToolResultPruning ?? + (this.compactionMode === "tokens" || this.compactionMode === "summary"); + this.toolResultPruning = options.toolResultPruning; + this.meta = this.loadMeta(); } @@ -209,7 +228,33 @@ export class SessionManager { return shouldCompactTokens(estimation); } - async maybeCompact(messages: AgentMessage[]) { + async maybeCompact(messages: AgentMessage[]): Promise { + let workingMessages = messages; + let toolResultPruningApplied = false; + + // Phase 1: Tool result pruning (soft trim / hard clear) + // This reduces token usage without removing messages + if (this.enableToolResultPruning) { + const pruneResult = pruneToolResults({ + messages: workingMessages, + contextWindowTokens: this.contextWindowTokens, + settings: this.toolResultPruning, + }); + + if (pruneResult.changed) { + workingMessages = pruneResult.messages; + toolResultPruningApplied = true; + // Log pruning stats + if (pruneResult.softTrimmed > 0 || pruneResult.hardCleared > 0) { + console.error( + `[SessionManager] Tool result pruning: ${pruneResult.softTrimmed} soft-trimmed, ` + + `${pruneResult.hardCleared} hard-cleared, ~${Math.round(pruneResult.charsSaved / 1000)}k chars saved`, + ); + } + } + } + + // Phase 2: Message compaction (remove old messages if still needed) let result; if (this.compactionMode === "summary") { @@ -219,7 +264,7 @@ export class SessionManager { if (!apiKey) { // No API key available, downgrade to tokens mode - result = compactMessages(messages, { + result = compactMessages(workingMessages, { mode: "tokens", contextWindowTokens: this.contextWindowTokens, systemPrompt: this.systemPrompt, @@ -228,7 +273,7 @@ export class SessionManager { minKeepMessages: this.minKeepMessages, }); } else { - result = await compactMessagesAsync(messages, { + result = await compactMessagesAsync(workingMessages, { mode: "summary", model, apiKey, @@ -247,7 +292,7 @@ export class SessionManager { } } } else { - result = compactMessages(messages, { + result = compactMessages(workingMessages, { mode: this.compactionMode, // Count mode parameters maxMessages: this.maxMessages, @@ -261,7 +306,14 @@ export class SessionManager { }); } - if (!result) return null; + // If no message compaction needed but tool result pruning was applied, + // still return the pruned messages + if (!result) { + if (toolResultPruningApplied) { + return { kept: workingMessages, removedCount: 0, reason: "pruning" as const }; + } + return null; + } const entries: SessionEntry[] = []; if (this.meta) { diff --git a/src/agent/session/types.ts b/src/agent/session/types.ts index 72c810ab..2d311902 100644 --- a/src/agent/session/types.ts +++ b/src/agent/session/types.ts @@ -23,5 +23,5 @@ export type SessionEntry = tokensKept?: number | undefined; /** 摘要模式生成的摘要 */ summary?: string | undefined; - reason?: "count" | "tokens" | "summary" | undefined; + reason?: "count" | "tokens" | "summary" | "pruning" | undefined; }; diff --git a/turbo.json b/turbo.json index d91a416f..22c06f3b 100644 --- a/turbo.json +++ b/turbo.json @@ -1,5 +1,6 @@ { "$schema": "https://turbo.build/schema.json", + "globalDependencies": ["src/**"], "tasks": { "build": { "dependsOn": ["^build"],