From 3f9a30423d6300a9d075ff78384be91f16e5000a Mon Sep 17 00:00:00 2001 From: Jiayuan Zhang Date: Sun, 15 Feb 2026 23:02:18 +0800 Subject: [PATCH] feat(session): add artifact storage and pre-emptive tool result truncation Oversized tool results (>30% of context window) are now saved as artifacts before being truncated in the session. The LLM sees a truncated version with head+tail preservation and a marker pointing to the full artifact file, which it can re-read on demand. This prevents information loss during context window management. Co-Authored-By: Claude Opus 4.6 --- .../core/src/agent/context-window/index.ts | 11 + .../tool-result-truncation.test.ts | 320 ++++++++++++++++++ .../context-window/tool-result-truncation.ts | 217 ++++++++++++ .../session/artifact-integration.test.ts | 246 ++++++++++++++ .../src/agent/session/artifact-store.test.ts | 104 ++++++ .../core/src/agent/session/artifact-store.ts | 87 +++++ .../core/src/agent/session/session-manager.ts | 44 ++- 7 files changed, 1028 insertions(+), 1 deletion(-) create mode 100644 packages/core/src/agent/context-window/tool-result-truncation.test.ts create mode 100644 packages/core/src/agent/context-window/tool-result-truncation.ts create mode 100644 packages/core/src/agent/session/artifact-integration.test.ts create mode 100644 packages/core/src/agent/session/artifact-store.test.ts create mode 100644 packages/core/src/agent/session/artifact-store.ts diff --git a/packages/core/src/agent/context-window/index.ts b/packages/core/src/agent/context-window/index.ts index 62ebdf4b..381cb71d 100644 --- a/packages/core/src/agent/context-window/index.ts +++ b/packages/core/src/agent/context-window/index.ts @@ -67,3 +67,14 @@ export { DEFAULT_TOOL_RESULT_PRUNING_SETTINGS, pruneToolResults, } from "./tool-result-pruning.js"; + +// Pre-emptive tool result truncation +export type { + ToolResultTruncationSettings, + TruncatedToolResult, + TruncationResult, +} from "./tool-result-truncation.js"; +export { + DEFAULT_TOOL_RESULT_TRUNCATION_SETTINGS, + truncateOversizedToolResults, +} from "./tool-result-truncation.js"; diff --git a/packages/core/src/agent/context-window/tool-result-truncation.test.ts b/packages/core/src/agent/context-window/tool-result-truncation.test.ts new file mode 100644 index 00000000..996ca4bb --- /dev/null +++ b/packages/core/src/agent/context-window/tool-result-truncation.test.ts @@ -0,0 +1,320 @@ +import { describe, it, expect, beforeEach } from "vitest"; +import { + truncateOversizedToolResults, + DEFAULT_TOOL_RESULT_TRUNCATION_SETTINGS, +} from "./tool-result-truncation.js"; +import type { AgentMessage } from "@mariozechner/pi-agent-core"; + +describe("tool-result-truncation", () => { + describe("DEFAULT_TOOL_RESULT_TRUNCATION_SETTINGS", () => { + it("should have expected defaults", () => { + expect(DEFAULT_TOOL_RESULT_TRUNCATION_SETTINGS.maxResultContextShare).toBe(0.3); + expect(DEFAULT_TOOL_RESULT_TRUNCATION_SETTINGS.hardMaxResultChars).toBe(400_000); + expect(DEFAULT_TOOL_RESULT_TRUNCATION_SETTINGS.minKeepChars).toBe(2_000); + expect(DEFAULT_TOOL_RESULT_TRUNCATION_SETTINGS.headRatio).toBe(0.7); + expect(DEFAULT_TOOL_RESULT_TRUNCATION_SETTINGS.tailRatio).toBe(0.2); + }); + }); + + describe("truncateOversizedToolResults", () => { + // Helper to create artifact paths + const savedArtifacts: Array<{ toolCallId: string; content: string }> = []; + const mockSaveArtifact = (toolCallId: string, content: string) => { + savedArtifacts.push({ toolCallId, content }); + return `artifacts/${toolCallId}.txt`; + }; + + beforeEach(() => { + savedArtifacts.length = 0; + }); + + it("should not truncate assistant messages", () => { + const message = { + role: "assistant", + content: "x".repeat(500_000), + } as AgentMessage; + + const result = truncateOversizedToolResults({ + message, + contextWindowTokens: 100_000, + saveArtifact: mockSaveArtifact, + }); + + expect(result.truncated).toBe(false); + expect(result.artifacts).toHaveLength(0); + expect(savedArtifacts).toHaveLength(0); + }); + + it("should not truncate plain user text messages", () => { + const message = { + role: "user", + content: "Hello, world!", + } as AgentMessage; + + const result = truncateOversizedToolResults({ + message, + contextWindowTokens: 100_000, + saveArtifact: mockSaveArtifact, + }); + + expect(result.truncated).toBe(false); + }); + + it("should not truncate small tool results", () => { + const message = { + role: "user", + content: [ + { + type: "tool_result", + tool_use_id: "toolu_001", + name: "read", + content: [{ type: "text", text: "small result" }], + }, + ], + } as unknown as AgentMessage; + + const result = truncateOversizedToolResults({ + message, + contextWindowTokens: 100_000, + saveArtifact: mockSaveArtifact, + }); + + expect(result.truncated).toBe(false); + expect(savedArtifacts).toHaveLength(0); + }); + + it("should truncate oversized tool results", () => { + // 100k tokens * 4 chars/token * 0.3 share = 120,000 char max + const largeContent = "x".repeat(200_000); + const message = { + role: "user", + content: [ + { + type: "tool_result", + tool_use_id: "toolu_large", + name: "exec", + content: [{ type: "text", text: largeContent }], + }, + ], + } as unknown as AgentMessage; + + const result = truncateOversizedToolResults({ + message, + contextWindowTokens: 100_000, + saveArtifact: mockSaveArtifact, + }); + + expect(result.truncated).toBe(true); + expect(result.artifacts).toHaveLength(1); + expect(result.artifacts[0]!.toolCallId).toBe("toolu_large"); + expect(result.artifacts[0]!.toolName).toBe("exec"); + expect(result.artifacts[0]!.originalChars).toBe(200_000); + expect(result.artifacts[0]!.artifactRelPath).toBe("artifacts/toolu_large.txt"); + + // Verify artifact was saved + expect(savedArtifacts).toHaveLength(1); + expect(savedArtifacts[0]!.content).toBe(largeContent); + + // Verify the truncated message is smaller + const truncatedBlock = (result.message as any).content[0]; + const truncatedText = truncatedBlock.content[0].text; + expect(truncatedText.length).toBeLessThan(200_000); + expect(truncatedText).toContain("[Tool result truncated:"); + expect(truncatedText).toContain("artifacts/toolu_large.txt"); + expect(truncatedText).toContain("read tool"); + }); + + it("should skip image-containing tool results", () => { + const message = { + role: "user", + content: [ + { + type: "tool_result", + tool_use_id: "toolu_img", + name: "browser", + content: [ + { type: "text", text: "x".repeat(500_000) }, + { type: "image", data: "base64data..." }, + ], + }, + ], + } as unknown as AgentMessage; + + const result = truncateOversizedToolResults({ + message, + contextWindowTokens: 100_000, + saveArtifact: mockSaveArtifact, + }); + + expect(result.truncated).toBe(false); + }); + + it("should respect hardMaxResultChars", () => { + // Set a very generous context share but strict hard max + const largeContent = "x".repeat(10_000); + const message = { + role: "user", + content: [ + { + type: "tool_result", + tool_use_id: "toolu_hard", + name: "exec", + content: [{ type: "text", text: largeContent }], + }, + ], + } as unknown as AgentMessage; + + const result = truncateOversizedToolResults({ + message, + contextWindowTokens: 1_000_000, // very large context + settings: { hardMaxResultChars: 5_000 }, + saveArtifact: mockSaveArtifact, + }); + + expect(result.truncated).toBe(true); + }); + + it("should handle multiple tool results in one message", () => { + const message = { + role: "user", + content: [ + { + type: "tool_result", + tool_use_id: "toolu_small", + name: "read", + content: [{ type: "text", text: "small" }], + }, + { + type: "tool_result", + tool_use_id: "toolu_big", + name: "exec", + content: [{ type: "text", text: "y".repeat(200_000) }], + }, + { + type: "text", + text: "some user text", + }, + ], + } as unknown as AgentMessage; + + const result = truncateOversizedToolResults({ + message, + contextWindowTokens: 100_000, + saveArtifact: mockSaveArtifact, + }); + + expect(result.truncated).toBe(true); + expect(result.artifacts).toHaveLength(1); + expect(result.artifacts[0]!.toolCallId).toBe("toolu_big"); + + // Non-tool-result blocks preserved unchanged + const blocks = (result.message as any).content; + expect(blocks[0].content[0].text).toBe("small"); // small tool result unchanged + expect(blocks[2].text).toBe("some user text"); // text block unchanged + }); + + it("should handle string content in tool results", () => { + const largeContent = "z".repeat(200_000); + const message = { + role: "user", + content: [ + { + type: "tool_result", + tool_use_id: "toolu_str", + name: "exec", + content: largeContent, + }, + ], + } as unknown as AgentMessage; + + const result = truncateOversizedToolResults({ + message, + contextWindowTokens: 100_000, + saveArtifact: mockSaveArtifact, + }); + + expect(result.truncated).toBe(true); + expect(savedArtifacts[0]!.content).toBe(largeContent); + }); + + it("should preserve head and tail of truncated content", () => { + // Create content with identifiable head and tail + const head = "HEAD_" + "a".repeat(50_000); + const middle = "b".repeat(100_000); + const tail = "c".repeat(50_000) + "_TAIL"; + const content = head + middle + tail; + + const message = { + role: "user", + content: [ + { + type: "tool_result", + tool_use_id: "toolu_headtail", + name: "exec", + content: [{ type: "text", text: content }], + }, + ], + } as unknown as AgentMessage; + + const result = truncateOversizedToolResults({ + message, + contextWindowTokens: 100_000, // 120k char max + saveArtifact: mockSaveArtifact, + }); + + expect(result.truncated).toBe(true); + const truncatedText = (result.message as any).content[0].content[0].text; + expect(truncatedText).toContain("HEAD_"); + expect(truncatedText).toContain("_TAIL"); + }); + + it("should use custom settings", () => { + const content = "x".repeat(5_000); + const message = { + role: "user", + content: [ + { + type: "tool_result", + tool_use_id: "toolu_custom", + name: "exec", + content: [{ type: "text", text: content }], + }, + ], + } as unknown as AgentMessage; + + // Small context with tight settings should trigger truncation + const result = truncateOversizedToolResults({ + message, + contextWindowTokens: 1_000, // 1000 tokens * 4 * 0.3 = 1200 char max + settings: { minKeepChars: 500 }, + saveArtifact: mockSaveArtifact, + }); + + expect(result.truncated).toBe(true); + }); + + it("should not truncate when content fits within minKeepChars", () => { + const content = "x".repeat(1_500); + const message = { + role: "user", + content: [ + { + type: "tool_result", + tool_use_id: "toolu_min", + name: "exec", + content: [{ type: "text", text: content }], + }, + ], + } as unknown as AgentMessage; + + // Even with very small context, minKeepChars (2000) > content (1500) + const result = truncateOversizedToolResults({ + message, + contextWindowTokens: 100, + saveArtifact: mockSaveArtifact, + }); + + expect(result.truncated).toBe(false); + }); + }); +}); diff --git a/packages/core/src/agent/context-window/tool-result-truncation.ts b/packages/core/src/agent/context-window/tool-result-truncation.ts new file mode 100644 index 00000000..0cf40470 --- /dev/null +++ b/packages/core/src/agent/context-window/tool-result-truncation.ts @@ -0,0 +1,217 @@ +/** + * Pre-emptive Tool Result Truncation + * + * Truncates oversized tool results BEFORE they are persisted to the session file. + * The original full content is saved as an artifact so the agent can re-read it. + * + * This differs from tool-result-pruning.ts which operates AFTER persistence + * during post-turn compaction. Pre-emptive truncation ensures: + * 1. Session files don't grow unbounded + * 2. Truncation markers tell the LLM where to find the full data + * 3. The agent can use the read tool to access full artifacts when needed + */ + +import type { AgentMessage } from "@mariozechner/pi-agent-core"; + +// ─── Settings ───────────────────────────────────────────────────────────────── + +export type ToolResultTruncationSettings = { + /** Max fraction of context window a single tool result may occupy (default: 0.3) */ + maxResultContextShare: number; + /** Absolute hard cap in characters (default: 400_000) */ + hardMaxResultChars: number; + /** Minimum characters to always keep (default: 2_000) */ + minKeepChars: number; + /** Fraction of budget allocated to head (default: 0.7) */ + headRatio: number; + /** Fraction of budget allocated to tail (default: 0.2) */ + tailRatio: number; +}; + +export const DEFAULT_TOOL_RESULT_TRUNCATION_SETTINGS: ToolResultTruncationSettings = { + maxResultContextShare: 0.3, + hardMaxResultChars: 400_000, + minKeepChars: 2_000, + headRatio: 0.7, + tailRatio: 0.2, +}; + +const CHARS_PER_TOKEN = 4; + +// ─── Types ──────────────────────────────────────────────────────────────────── + +export type TruncatedToolResult = { + toolCallId: string; + toolName: string; + originalChars: number; + artifactRelPath: string; +}; + +export type TruncationResult = { + /** The (possibly modified) message */ + message: AgentMessage; + /** Whether any truncation was applied */ + truncated: boolean; + /** Info about each truncated tool result */ + artifacts: TruncatedToolResult[]; +}; + +// ─── Core Logic ─────────────────────────────────────────────────────────────── + +/** + * Compute the max chars allowed for a single tool result. + */ +function computeMaxChars( + contextWindowTokens: number, + settings: ToolResultTruncationSettings, +): number { + const contextShare = contextWindowTokens * CHARS_PER_TOKEN * settings.maxResultContextShare; + return Math.min(contextShare, settings.hardMaxResultChars); +} + +/** + * Extract text content from a tool result content field. + */ +function extractText(content: unknown): string { + if (typeof content === "string") return content; + if (Array.isArray(content)) { + const parts: string[] = []; + for (const block of content) { + if (typeof block === "string") { + parts.push(block); + } else if (block && typeof block === "object" && "text" in block && typeof block.text === "string") { + parts.push(block.text); + } + } + return parts.join("\n"); + } + return ""; +} + +/** + * Truncate a text string, keeping head and tail portions. + */ +function truncateText( + text: string, + maxChars: number, + artifactRelPath: string, + settings: ToolResultTruncationSettings, +): string { + const keepChars = Math.max(settings.minKeepChars, maxChars); + if (text.length <= keepChars) return text; + + const headChars = Math.floor(keepChars * settings.headRatio); + const tailChars = Math.floor(keepChars * settings.tailRatio); + + // Try to break at a newline boundary for the head + let headEnd = headChars; + const lastNewline = text.lastIndexOf("\n", headChars); + if (lastNewline > headChars * 0.8) { + headEnd = lastNewline; + } + + const head = text.slice(0, headEnd); + const tail = text.slice(text.length - tailChars); + + const marker = + `\n\n[Tool result truncated: original ${text.length} chars. ` + + `Full result saved to ${artifactRelPath}. ` + + `Use the read tool to access the complete data if needed.]\n\n`; + + return head + marker + tail; +} + +/** + * Check if a content block contains images (skip those). + */ +function hasImages(content: unknown): boolean { + if (!Array.isArray(content)) return false; + return content.some( + (b: any) => b && typeof b === "object" && b.type === "image", + ); +} + +/** + * Process a single user message. Detects oversized tool results and returns + * truncation info. Does NOT save artifacts — the caller is responsible for that. + * + * @param saveArtifact - callback to save the original content and get the relative path + */ +export function truncateOversizedToolResults(params: { + message: AgentMessage; + contextWindowTokens: number; + settings?: Partial; + /** Called to save original content. Must return the relative artifact path. */ + saveArtifact: (toolCallId: string, content: string) => string; +}): TruncationResult { + const settings: ToolResultTruncationSettings = { + ...DEFAULT_TOOL_RESULT_TRUNCATION_SETTINGS, + ...params.settings, + }; + + const msgAny = params.message as any; + + // Only process user messages with array content (tool results come as user messages) + if (params.message.role !== "user" || !Array.isArray(msgAny.content)) { + return { message: params.message, truncated: false, artifacts: [] }; + } + + const maxChars = computeMaxChars(params.contextWindowTokens, settings); + let changed = false; + const artifacts: TruncatedToolResult[] = []; + const newContent: any[] = []; + + for (const block of msgAny.content) { + if (!block || typeof block !== "object" || block.type !== "tool_result") { + newContent.push(block); + continue; + } + + // Skip image-containing results + if (hasImages(block.content)) { + newContent.push(block); + continue; + } + + const text = extractText(block.content); + + // Check if oversized (respect minKeepChars floor) + const effectiveMax = Math.max(maxChars, settings.minKeepChars); + if (text.length <= effectiveMax) { + newContent.push(block); + continue; + } + + const toolCallId = block.tool_use_id ?? "unknown"; + const toolName = block.name ?? "unknown"; + + // Save original as artifact + const artifactRelPath = params.saveArtifact(toolCallId, text); + + // Truncate the text + const truncatedText = truncateText(text, maxChars, artifactRelPath, settings); + + newContent.push({ + ...block, + content: [{ type: "text", text: truncatedText }], + }); + + artifacts.push({ + toolCallId, + toolName, + originalChars: text.length, + artifactRelPath, + }); + changed = true; + } + + if (!changed) { + return { message: params.message, truncated: false, artifacts: [] }; + } + + return { + message: { ...params.message, content: newContent } as AgentMessage, + truncated: true, + artifacts, + }; +} diff --git a/packages/core/src/agent/session/artifact-integration.test.ts b/packages/core/src/agent/session/artifact-integration.test.ts new file mode 100644 index 00000000..833ef9dd --- /dev/null +++ b/packages/core/src/agent/session/artifact-integration.test.ts @@ -0,0 +1,246 @@ +/** + * E2E Integration Test: Phase 1 — Artifact Storage + Pre-emptive Truncation + * + * Tests the full flow: SessionManager → truncateOversizedToolResults → artifact-store + */ +import { describe, it, expect, beforeEach, afterEach } from "vitest"; +import { mkdirSync, rmSync, existsSync } from "node:fs"; +import { join } from "node:path"; +import { tmpdir } from "node:os"; +import { SessionManager } from "./session-manager.js"; +import { readEntries } from "./storage.js"; +import { readToolResultArtifact } from "./artifact-store.js"; + +const makeTestDir = () => { + const dir = join(tmpdir(), `multica-e2e-p1-${Date.now()}-${Math.random().toString(36).slice(2)}`); + mkdirSync(dir, { recursive: true }); + return dir; +}; + +/** + * Extract text from a tool_result content field, which can be: + * - a string (original format) + * - an array of { type: "text", text: "..." } (after truncation) + */ +function extractContentText(content: unknown): string { + if (typeof content === "string") return content; + if (Array.isArray(content)) { + return content + .filter((b: any) => b?.type === "text") + .map((b: any) => b.text) + .join(""); + } + return ""; +} + +describe("Phase 1 E2E: Artifact Storage + Pre-emptive Truncation", () => { + let testDir: string; + const sessionId = "test-session-e2e"; + + beforeEach(() => { + testDir = makeTestDir(); + }); + + afterEach(() => { + rmSync(testDir, { recursive: true, force: true }); + }); + + it("saves oversized tool result to artifact and truncates in session", async () => { + const sm = new SessionManager({ + sessionId, + baseDir: testDir, + compactionMode: "tokens", + contextWindowTokens: 100_000, + enableToolResultTruncation: true, + enableToolResultPruning: false, + }); + + // Create an oversized tool result (> 30% of 100k * 4 chars = 120k chars) + const bigContent = "X".repeat(200_000); + const userMessage = { + role: "user" as const, + content: [ + { + type: "tool_result" as const, + tool_use_id: "call_abc123", + content: bigContent, + }, + ], + timestamp: Date.now(), + }; + + sm.saveMessage(userMessage); + await sm.flush(); + + // Verify: session file has truncated content + const entries = readEntries(sessionId, { baseDir: testDir }); + const msgEntries = entries.filter((e) => e.type === "message"); + expect(msgEntries.length).toBe(1); + + const saved = (msgEntries[0] as any).message; + const savedText = extractContentText(saved.content[0].content); + expect(savedText.length).toBeLessThan(bigContent.length); + expect(savedText).toContain("Tool result truncated"); + expect(savedText).toContain("artifacts/"); + + // Verify: artifact file exists with full content + const artifactContent = readToolResultArtifact(sessionId, "call_abc123", { baseDir: testDir }); + expect(artifactContent).toBe(bigContent); + }); + + it("does NOT create artifact for small tool results", async () => { + const sm = new SessionManager({ + sessionId, + baseDir: testDir, + compactionMode: "tokens", + contextWindowTokens: 200_000, + enableToolResultTruncation: true, + enableToolResultPruning: false, + }); + + const smallContent = "Small result data"; + const userMessage = { + role: "user" as const, + content: [ + { + type: "tool_result" as const, + tool_use_id: "call_small", + content: smallContent, + }, + ], + timestamp: Date.now(), + }; + + sm.saveMessage(userMessage); + await sm.flush(); + + // Verify: session file has full content (no truncation) + const entries = readEntries(sessionId, { baseDir: testDir }); + const saved = (entries.find((e) => e.type === "message") as any).message; + const savedText = extractContentText(saved.content[0].content); + expect(savedText).toBe(smallContent); + + // Verify: no artifacts directory created + const artifactsDir = join(testDir, "sessions", sessionId, "artifacts"); + expect(existsSync(artifactsDir)).toBe(false); + }); + + it("truncated message preserves head and tail of original content", async () => { + const sm = new SessionManager({ + sessionId, + baseDir: testDir, + compactionMode: "tokens", + contextWindowTokens: 50_000, // smaller window → lower threshold + enableToolResultTruncation: true, + enableToolResultPruning: false, + }); + + // Create content with identifiable head and tail + const head = "HEAD_MARKER_" + "A".repeat(10_000); + const middle = "B".repeat(100_000); + const tail = "C".repeat(10_000) + "_TAIL_MARKER"; + const bigContent = head + middle + tail; + + const userMessage = { + role: "user" as const, + content: [ + { + type: "tool_result" as const, + tool_use_id: "call_headtail", + content: bigContent, + }, + ], + timestamp: Date.now(), + }; + + sm.saveMessage(userMessage); + await sm.flush(); + + const entries = readEntries(sessionId, { baseDir: testDir }); + const saved = (entries.find((e) => e.type === "message") as any).message; + const savedText = extractContentText(saved.content[0].content); + + // Head should be preserved + expect(savedText).toContain("HEAD_MARKER_"); + // Tail should be preserved + expect(savedText).toContain("_TAIL_MARKER"); + // Middle should be truncated + expect(savedText.length).toBeLessThan(bigContent.length); + }); + + it("handles multiple tool results in same message", async () => { + const sm = new SessionManager({ + sessionId, + baseDir: testDir, + compactionMode: "tokens", + contextWindowTokens: 50_000, + enableToolResultTruncation: true, + enableToolResultPruning: false, + }); + + const bigContent1 = "RESULT1_" + "X".repeat(200_000); + const smallContent = "small result"; + const bigContent2 = "RESULT2_" + "Y".repeat(200_000); + + const userMessage = { + role: "user" as const, + content: [ + { type: "tool_result" as const, tool_use_id: "call_big1", content: bigContent1 }, + { type: "tool_result" as const, tool_use_id: "call_small", content: smallContent }, + { type: "tool_result" as const, tool_use_id: "call_big2", content: bigContent2 }, + ], + timestamp: Date.now(), + }; + + sm.saveMessage(userMessage); + await sm.flush(); + + const entries = readEntries(sessionId, { baseDir: testDir }); + const saved = (entries.find((e) => e.type === "message") as any).message; + + // Big results should be truncated + const text0 = extractContentText(saved.content[0].content); + const text2 = extractContentText(saved.content[2].content); + expect(text0).toContain("Tool result truncated"); + expect(text2).toContain("Tool result truncated"); + + // Small result should be unchanged + const text1 = extractContentText(saved.content[1].content); + expect(text1).toBe(smallContent); + + // Both artifacts should exist + const art1 = readToolResultArtifact(sessionId, "call_big1", { baseDir: testDir }); + expect(art1).toContain("RESULT1_"); + const art2 = readToolResultArtifact(sessionId, "call_big2", { baseDir: testDir }); + expect(art2).toContain("RESULT2_"); + }); + + it("respects enableToolResultTruncation=false", async () => { + const sm = new SessionManager({ + sessionId, + baseDir: testDir, + compactionMode: "tokens", + contextWindowTokens: 50_000, + enableToolResultTruncation: false, // Disabled + enableToolResultPruning: false, + }); + + const bigContent = "Z".repeat(200_000); + const userMessage = { + role: "user" as const, + content: [ + { type: "tool_result" as const, tool_use_id: "call_noop", content: bigContent }, + ], + timestamp: Date.now(), + }; + + sm.saveMessage(userMessage); + await sm.flush(); + + const entries = readEntries(sessionId, { baseDir: testDir }); + const saved = (entries.find((e) => e.type === "message") as any).message; + // Should NOT be truncated since feature is disabled + const savedText = extractContentText(saved.content[0].content); + expect(savedText).toBe(bigContent); + }); +}); diff --git a/packages/core/src/agent/session/artifact-store.test.ts b/packages/core/src/agent/session/artifact-store.test.ts new file mode 100644 index 00000000..0e9849b9 --- /dev/null +++ b/packages/core/src/agent/session/artifact-store.test.ts @@ -0,0 +1,104 @@ +import { describe, it, expect, beforeEach, afterEach } from "vitest"; +import { mkdirSync, rmSync, existsSync, readFileSync } from "fs"; +import { join } from "path"; +import { tmpdir } from "os"; +import { + saveToolResultArtifact, + readToolResultArtifact, + resolveArtifactsDir, + resolveArtifactPath, +} from "./artifact-store.js"; + +describe("artifact-store", () => { + const testDir = join(tmpdir(), `multica-artifact-test-${Date.now()}`); + const sessionsDir = join(testDir, "sessions"); + const sessionId = "test-session-001"; + + beforeEach(() => { + mkdirSync(sessionsDir, { recursive: true }); + }); + + afterEach(() => { + rmSync(testDir, { recursive: true, force: true }); + }); + + describe("resolveArtifactsDir", () => { + it("should resolve to artifacts subdirectory", () => { + const dir = resolveArtifactsDir(sessionId, { baseDir: sessionsDir }); + expect(dir).toBe(join(sessionsDir, sessionId, "artifacts")); + }); + }); + + describe("resolveArtifactPath", () => { + it("should resolve to a .txt file in the artifacts directory", () => { + const path = resolveArtifactPath(sessionId, "toolu_abc123", { baseDir: sessionsDir }); + expect(path).toBe(join(sessionsDir, sessionId, "artifacts", "toolu_abc123.txt")); + }); + + it("should sanitize unsafe characters in toolCallId", () => { + const path = resolveArtifactPath(sessionId, "tool/../../../etc", { baseDir: sessionsDir }); + expect(path).not.toContain(".."); + expect(path.endsWith(".txt")).toBe(true); + expect(path).toContain("artifacts"); + }); + }); + + describe("saveToolResultArtifact", () => { + it("should save content to a file and return relative path", () => { + const content = "Full stock data for 10 companies..."; + const relPath = saveToolResultArtifact(sessionId, "toolu_001", content, { baseDir: sessionsDir }); + + expect(relPath).toBe("artifacts/toolu_001.txt"); + + const filePath = join(sessionsDir, sessionId, relPath); + expect(existsSync(filePath)).toBe(true); + expect(readFileSync(filePath, "utf8")).toBe(content); + }); + + it("should create artifacts directory if it does not exist", () => { + const artifactsDir = resolveArtifactsDir(sessionId, { baseDir: sessionsDir }); + expect(existsSync(artifactsDir)).toBe(false); + + saveToolResultArtifact(sessionId, "toolu_002", "data", { baseDir: sessionsDir }); + expect(existsSync(artifactsDir)).toBe(true); + }); + + it("should handle multiple artifacts for the same session", () => { + saveToolResultArtifact(sessionId, "toolu_001", "data1", { baseDir: sessionsDir }); + saveToolResultArtifact(sessionId, "toolu_002", "data2", { baseDir: sessionsDir }); + + const data1 = readToolResultArtifact(sessionId, "toolu_001", { baseDir: sessionsDir }); + const data2 = readToolResultArtifact(sessionId, "toolu_002", { baseDir: sessionsDir }); + expect(data1).toBe("data1"); + expect(data2).toBe("data2"); + }); + + it("should overwrite existing artifact with same toolCallId", () => { + saveToolResultArtifact(sessionId, "toolu_001", "old data", { baseDir: sessionsDir }); + saveToolResultArtifact(sessionId, "toolu_001", "new data", { baseDir: sessionsDir }); + + const data = readToolResultArtifact(sessionId, "toolu_001", { baseDir: sessionsDir }); + expect(data).toBe("new data"); + }); + }); + + describe("readToolResultArtifact", () => { + it("should return null for non-existent artifact", () => { + const result = readToolResultArtifact(sessionId, "nonexistent", { baseDir: sessionsDir }); + expect(result).toBeNull(); + }); + + it("should return content for existing artifact", () => { + saveToolResultArtifact(sessionId, "toolu_read", "test content", { baseDir: sessionsDir }); + const result = readToolResultArtifact(sessionId, "toolu_read", { baseDir: sessionsDir }); + expect(result).toBe("test content"); + }); + + it("should handle large content", () => { + const largeContent = "x".repeat(500_000); + saveToolResultArtifact(sessionId, "toolu_large", largeContent, { baseDir: sessionsDir }); + const result = readToolResultArtifact(sessionId, "toolu_large", { baseDir: sessionsDir }); + expect(result).toBe(largeContent); + }); + }); +}); diff --git a/packages/core/src/agent/session/artifact-store.ts b/packages/core/src/agent/session/artifact-store.ts new file mode 100644 index 00000000..2cc107e3 --- /dev/null +++ b/packages/core/src/agent/session/artifact-store.ts @@ -0,0 +1,87 @@ +/** + * Artifact Store + * + * Preserves full tool result data when results are truncated for context window + * management. Stored alongside session data so the agent can re-read them. + * + * Directory layout: + * ~/.super-multica/sessions/{sessionId}/artifacts/{toolCallId}.txt + */ + +import { join } from "path"; +import { existsSync, mkdirSync, readFileSync, writeFileSync } from "fs"; +import { resolveSessionDir, type SessionStorageOptions } from "./storage.js"; + +export function resolveArtifactsDir( + sessionId: string, + options?: SessionStorageOptions, +): string { + return join(resolveSessionDir(sessionId, options), "artifacts"); +} + +function ensureArtifactsDir( + sessionId: string, + options?: SessionStorageOptions, +): void { + const dir = resolveArtifactsDir(sessionId, options); + try { + mkdirSync(dir, { recursive: true }); + } catch (err) { + if ((err as NodeJS.ErrnoException).code === "ENOENT") { + mkdirSync(dir, { recursive: true }); + } else { + throw err; + } + } +} + +/** + * Save tool result content as an artifact. + * + * @returns The relative path from session directory (e.g. "artifacts/{toolCallId}.txt") + */ +export function saveToolResultArtifact( + sessionId: string, + toolCallId: string, + content: string, + options?: SessionStorageOptions, +): string { + ensureArtifactsDir(sessionId, options); + // Sanitize toolCallId for filesystem safety + const safeId = toolCallId.replace(/[^a-zA-Z0-9_-]/g, "_"); + const fileName = `${safeId}.txt`; + const filePath = join(resolveArtifactsDir(sessionId, options), fileName); + writeFileSync(filePath, content, "utf8"); + return `artifacts/${fileName}`; +} + +/** + * Read a tool result artifact by toolCallId. + * + * @returns The full content, or null if not found. + */ +export function readToolResultArtifact( + sessionId: string, + toolCallId: string, + options?: SessionStorageOptions, +): string | null { + const safeId = toolCallId.replace(/[^a-zA-Z0-9_-]/g, "_"); + const filePath = join( + resolveArtifactsDir(sessionId, options), + `${safeId}.txt`, + ); + if (!existsSync(filePath)) return null; + return readFileSync(filePath, "utf8"); +} + +/** + * Resolve the absolute path for an artifact. + */ +export function resolveArtifactPath( + sessionId: string, + toolCallId: string, + options?: SessionStorageOptions, +): string { + const safeId = toolCallId.replace(/[^a-zA-Z0-9_-]/g, "_"); + return join(resolveArtifactsDir(sessionId, options), `${safeId}.txt`); +} diff --git a/packages/core/src/agent/session/session-manager.ts b/packages/core/src/agent/session/session-manager.ts index 3b901fdd..5bcd31b0 100644 --- a/packages/core/src/agent/session/session-manager.ts +++ b/packages/core/src/agent/session/session-manager.ts @@ -11,6 +11,11 @@ import { pruneToolResults, type ToolResultPruningSettings, } from "../context-window/tool-result-pruning.js"; +import { + truncateOversizedToolResults, + type ToolResultTruncationSettings, +} from "../context-window/tool-result-truncation.js"; +import { saveToolResultArtifact } from "./artifact-store.js"; import type { RunLog } from "../run-log.js"; /** Get Kimi model for summarization (use a cheaper model than k2-thinking) */ @@ -62,6 +67,12 @@ export type SessionManagerOptions = { /** Tool result pruning settings */ toolResultPruning?: Partial | undefined; + // Pre-emptive tool result truncation + /** Whether to enable pre-emptive truncation of oversized tool results (default: true) */ + enableToolResultTruncation?: boolean | undefined; + /** Pre-emptive truncation settings */ + toolResultTruncation?: Partial | undefined; + // Observability /** RunLog instance for structured logging */ runLog?: RunLog | undefined; @@ -85,6 +96,9 @@ export class SessionManager { // Tool result pruning private readonly enableToolResultPruning: boolean; private readonly toolResultPruning: Partial | undefined; + // Pre-emptive truncation + private readonly enableToolResultTruncation: boolean; + private readonly toolResultTruncation: Partial | undefined; // Observability private readonly runLog: RunLog; @@ -114,6 +128,10 @@ export class SessionManager { this.enableToolResultPruning = options.enableToolResultPruning ?? true; this.toolResultPruning = options.toolResultPruning; + // Pre-emptive truncation (enabled by default) + this.enableToolResultTruncation = options.enableToolResultTruncation ?? true; + this.toolResultTruncation = options.toolResultTruncation; + // Observability this.runLog = options.runLog ?? { log() {}, async flush() {} }; @@ -231,12 +249,36 @@ export class SessionManager { message: AgentMessage, options?: { internal?: boolean; displayContent?: UserMessage["content"]; source?: import("./types.js").MessageSource }, ) { + // Pre-emptive truncation: save oversized tool results as artifacts + // and persist a truncated version in the JSONL session file. + let persistMessage = message; + if (this.enableToolResultTruncation && message.role === "user") { + const result = truncateOversizedToolResults({ + message, + contextWindowTokens: this.contextWindowTokens, + settings: this.toolResultTruncation, + saveArtifact: (toolCallId, content) => + saveToolResultArtifact(this.sessionId, toolCallId, content, { baseDir: this.baseDir }), + }); + if (result.truncated) { + persistMessage = result.message; + for (const art of result.artifacts) { + this.runLog.log("tool_result_truncation", { + tool_call_id: art.toolCallId, + tool_name: art.toolName, + original_chars: art.originalChars, + artifact_path: art.artifactRelPath, + }); + } + } + } + void this.enqueue(() => appendEntry( this.sessionId, { type: "message", - message, + message: persistMessage, timestamp: Date.now(), ...(options?.internal ? { internal: true } : {}), ...(options?.displayContent !== undefined