feat(session): add artifact storage and pre-emptive tool result truncation
Oversized tool results (>30% of context window) are now saved as artifacts before being truncated in the session. The LLM sees a truncated version with head+tail preservation and a marker pointing to the full artifact file, which it can re-read on demand. This prevents information loss during context window management. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
7a16df7c56
commit
3f9a30423d
7 changed files with 1028 additions and 1 deletions
|
|
@ -67,3 +67,14 @@ export {
|
|||
DEFAULT_TOOL_RESULT_PRUNING_SETTINGS,
|
||||
pruneToolResults,
|
||||
} from "./tool-result-pruning.js";
|
||||
|
||||
// Pre-emptive tool result truncation
|
||||
export type {
|
||||
ToolResultTruncationSettings,
|
||||
TruncatedToolResult,
|
||||
TruncationResult,
|
||||
} from "./tool-result-truncation.js";
|
||||
export {
|
||||
DEFAULT_TOOL_RESULT_TRUNCATION_SETTINGS,
|
||||
truncateOversizedToolResults,
|
||||
} from "./tool-result-truncation.js";
|
||||
|
|
|
|||
|
|
@ -0,0 +1,320 @@
|
|||
import { describe, it, expect, beforeEach } from "vitest";
|
||||
import {
|
||||
truncateOversizedToolResults,
|
||||
DEFAULT_TOOL_RESULT_TRUNCATION_SETTINGS,
|
||||
} from "./tool-result-truncation.js";
|
||||
import type { AgentMessage } from "@mariozechner/pi-agent-core";
|
||||
|
||||
describe("tool-result-truncation", () => {
|
||||
describe("DEFAULT_TOOL_RESULT_TRUNCATION_SETTINGS", () => {
|
||||
it("should have expected defaults", () => {
|
||||
expect(DEFAULT_TOOL_RESULT_TRUNCATION_SETTINGS.maxResultContextShare).toBe(0.3);
|
||||
expect(DEFAULT_TOOL_RESULT_TRUNCATION_SETTINGS.hardMaxResultChars).toBe(400_000);
|
||||
expect(DEFAULT_TOOL_RESULT_TRUNCATION_SETTINGS.minKeepChars).toBe(2_000);
|
||||
expect(DEFAULT_TOOL_RESULT_TRUNCATION_SETTINGS.headRatio).toBe(0.7);
|
||||
expect(DEFAULT_TOOL_RESULT_TRUNCATION_SETTINGS.tailRatio).toBe(0.2);
|
||||
});
|
||||
});
|
||||
|
||||
describe("truncateOversizedToolResults", () => {
|
||||
// Helper to create artifact paths
|
||||
const savedArtifacts: Array<{ toolCallId: string; content: string }> = [];
|
||||
const mockSaveArtifact = (toolCallId: string, content: string) => {
|
||||
savedArtifacts.push({ toolCallId, content });
|
||||
return `artifacts/${toolCallId}.txt`;
|
||||
};
|
||||
|
||||
beforeEach(() => {
|
||||
savedArtifacts.length = 0;
|
||||
});
|
||||
|
||||
it("should not truncate assistant messages", () => {
|
||||
const message = {
|
||||
role: "assistant",
|
||||
content: "x".repeat(500_000),
|
||||
} as AgentMessage;
|
||||
|
||||
const result = truncateOversizedToolResults({
|
||||
message,
|
||||
contextWindowTokens: 100_000,
|
||||
saveArtifact: mockSaveArtifact,
|
||||
});
|
||||
|
||||
expect(result.truncated).toBe(false);
|
||||
expect(result.artifacts).toHaveLength(0);
|
||||
expect(savedArtifacts).toHaveLength(0);
|
||||
});
|
||||
|
||||
it("should not truncate plain user text messages", () => {
|
||||
const message = {
|
||||
role: "user",
|
||||
content: "Hello, world!",
|
||||
} as AgentMessage;
|
||||
|
||||
const result = truncateOversizedToolResults({
|
||||
message,
|
||||
contextWindowTokens: 100_000,
|
||||
saveArtifact: mockSaveArtifact,
|
||||
});
|
||||
|
||||
expect(result.truncated).toBe(false);
|
||||
});
|
||||
|
||||
it("should not truncate small tool results", () => {
|
||||
const message = {
|
||||
role: "user",
|
||||
content: [
|
||||
{
|
||||
type: "tool_result",
|
||||
tool_use_id: "toolu_001",
|
||||
name: "read",
|
||||
content: [{ type: "text", text: "small result" }],
|
||||
},
|
||||
],
|
||||
} as unknown as AgentMessage;
|
||||
|
||||
const result = truncateOversizedToolResults({
|
||||
message,
|
||||
contextWindowTokens: 100_000,
|
||||
saveArtifact: mockSaveArtifact,
|
||||
});
|
||||
|
||||
expect(result.truncated).toBe(false);
|
||||
expect(savedArtifacts).toHaveLength(0);
|
||||
});
|
||||
|
||||
it("should truncate oversized tool results", () => {
|
||||
// 100k tokens * 4 chars/token * 0.3 share = 120,000 char max
|
||||
const largeContent = "x".repeat(200_000);
|
||||
const message = {
|
||||
role: "user",
|
||||
content: [
|
||||
{
|
||||
type: "tool_result",
|
||||
tool_use_id: "toolu_large",
|
||||
name: "exec",
|
||||
content: [{ type: "text", text: largeContent }],
|
||||
},
|
||||
],
|
||||
} as unknown as AgentMessage;
|
||||
|
||||
const result = truncateOversizedToolResults({
|
||||
message,
|
||||
contextWindowTokens: 100_000,
|
||||
saveArtifact: mockSaveArtifact,
|
||||
});
|
||||
|
||||
expect(result.truncated).toBe(true);
|
||||
expect(result.artifacts).toHaveLength(1);
|
||||
expect(result.artifacts[0]!.toolCallId).toBe("toolu_large");
|
||||
expect(result.artifacts[0]!.toolName).toBe("exec");
|
||||
expect(result.artifacts[0]!.originalChars).toBe(200_000);
|
||||
expect(result.artifacts[0]!.artifactRelPath).toBe("artifacts/toolu_large.txt");
|
||||
|
||||
// Verify artifact was saved
|
||||
expect(savedArtifacts).toHaveLength(1);
|
||||
expect(savedArtifacts[0]!.content).toBe(largeContent);
|
||||
|
||||
// Verify the truncated message is smaller
|
||||
const truncatedBlock = (result.message as any).content[0];
|
||||
const truncatedText = truncatedBlock.content[0].text;
|
||||
expect(truncatedText.length).toBeLessThan(200_000);
|
||||
expect(truncatedText).toContain("[Tool result truncated:");
|
||||
expect(truncatedText).toContain("artifacts/toolu_large.txt");
|
||||
expect(truncatedText).toContain("read tool");
|
||||
});
|
||||
|
||||
it("should skip image-containing tool results", () => {
|
||||
const message = {
|
||||
role: "user",
|
||||
content: [
|
||||
{
|
||||
type: "tool_result",
|
||||
tool_use_id: "toolu_img",
|
||||
name: "browser",
|
||||
content: [
|
||||
{ type: "text", text: "x".repeat(500_000) },
|
||||
{ type: "image", data: "base64data..." },
|
||||
],
|
||||
},
|
||||
],
|
||||
} as unknown as AgentMessage;
|
||||
|
||||
const result = truncateOversizedToolResults({
|
||||
message,
|
||||
contextWindowTokens: 100_000,
|
||||
saveArtifact: mockSaveArtifact,
|
||||
});
|
||||
|
||||
expect(result.truncated).toBe(false);
|
||||
});
|
||||
|
||||
it("should respect hardMaxResultChars", () => {
|
||||
// Set a very generous context share but strict hard max
|
||||
const largeContent = "x".repeat(10_000);
|
||||
const message = {
|
||||
role: "user",
|
||||
content: [
|
||||
{
|
||||
type: "tool_result",
|
||||
tool_use_id: "toolu_hard",
|
||||
name: "exec",
|
||||
content: [{ type: "text", text: largeContent }],
|
||||
},
|
||||
],
|
||||
} as unknown as AgentMessage;
|
||||
|
||||
const result = truncateOversizedToolResults({
|
||||
message,
|
||||
contextWindowTokens: 1_000_000, // very large context
|
||||
settings: { hardMaxResultChars: 5_000 },
|
||||
saveArtifact: mockSaveArtifact,
|
||||
});
|
||||
|
||||
expect(result.truncated).toBe(true);
|
||||
});
|
||||
|
||||
it("should handle multiple tool results in one message", () => {
|
||||
const message = {
|
||||
role: "user",
|
||||
content: [
|
||||
{
|
||||
type: "tool_result",
|
||||
tool_use_id: "toolu_small",
|
||||
name: "read",
|
||||
content: [{ type: "text", text: "small" }],
|
||||
},
|
||||
{
|
||||
type: "tool_result",
|
||||
tool_use_id: "toolu_big",
|
||||
name: "exec",
|
||||
content: [{ type: "text", text: "y".repeat(200_000) }],
|
||||
},
|
||||
{
|
||||
type: "text",
|
||||
text: "some user text",
|
||||
},
|
||||
],
|
||||
} as unknown as AgentMessage;
|
||||
|
||||
const result = truncateOversizedToolResults({
|
||||
message,
|
||||
contextWindowTokens: 100_000,
|
||||
saveArtifact: mockSaveArtifact,
|
||||
});
|
||||
|
||||
expect(result.truncated).toBe(true);
|
||||
expect(result.artifacts).toHaveLength(1);
|
||||
expect(result.artifacts[0]!.toolCallId).toBe("toolu_big");
|
||||
|
||||
// Non-tool-result blocks preserved unchanged
|
||||
const blocks = (result.message as any).content;
|
||||
expect(blocks[0].content[0].text).toBe("small"); // small tool result unchanged
|
||||
expect(blocks[2].text).toBe("some user text"); // text block unchanged
|
||||
});
|
||||
|
||||
it("should handle string content in tool results", () => {
|
||||
const largeContent = "z".repeat(200_000);
|
||||
const message = {
|
||||
role: "user",
|
||||
content: [
|
||||
{
|
||||
type: "tool_result",
|
||||
tool_use_id: "toolu_str",
|
||||
name: "exec",
|
||||
content: largeContent,
|
||||
},
|
||||
],
|
||||
} as unknown as AgentMessage;
|
||||
|
||||
const result = truncateOversizedToolResults({
|
||||
message,
|
||||
contextWindowTokens: 100_000,
|
||||
saveArtifact: mockSaveArtifact,
|
||||
});
|
||||
|
||||
expect(result.truncated).toBe(true);
|
||||
expect(savedArtifacts[0]!.content).toBe(largeContent);
|
||||
});
|
||||
|
||||
it("should preserve head and tail of truncated content", () => {
|
||||
// Create content with identifiable head and tail
|
||||
const head = "HEAD_" + "a".repeat(50_000);
|
||||
const middle = "b".repeat(100_000);
|
||||
const tail = "c".repeat(50_000) + "_TAIL";
|
||||
const content = head + middle + tail;
|
||||
|
||||
const message = {
|
||||
role: "user",
|
||||
content: [
|
||||
{
|
||||
type: "tool_result",
|
||||
tool_use_id: "toolu_headtail",
|
||||
name: "exec",
|
||||
content: [{ type: "text", text: content }],
|
||||
},
|
||||
],
|
||||
} as unknown as AgentMessage;
|
||||
|
||||
const result = truncateOversizedToolResults({
|
||||
message,
|
||||
contextWindowTokens: 100_000, // 120k char max
|
||||
saveArtifact: mockSaveArtifact,
|
||||
});
|
||||
|
||||
expect(result.truncated).toBe(true);
|
||||
const truncatedText = (result.message as any).content[0].content[0].text;
|
||||
expect(truncatedText).toContain("HEAD_");
|
||||
expect(truncatedText).toContain("_TAIL");
|
||||
});
|
||||
|
||||
it("should use custom settings", () => {
|
||||
const content = "x".repeat(5_000);
|
||||
const message = {
|
||||
role: "user",
|
||||
content: [
|
||||
{
|
||||
type: "tool_result",
|
||||
tool_use_id: "toolu_custom",
|
||||
name: "exec",
|
||||
content: [{ type: "text", text: content }],
|
||||
},
|
||||
],
|
||||
} as unknown as AgentMessage;
|
||||
|
||||
// Small context with tight settings should trigger truncation
|
||||
const result = truncateOversizedToolResults({
|
||||
message,
|
||||
contextWindowTokens: 1_000, // 1000 tokens * 4 * 0.3 = 1200 char max
|
||||
settings: { minKeepChars: 500 },
|
||||
saveArtifact: mockSaveArtifact,
|
||||
});
|
||||
|
||||
expect(result.truncated).toBe(true);
|
||||
});
|
||||
|
||||
it("should not truncate when content fits within minKeepChars", () => {
|
||||
const content = "x".repeat(1_500);
|
||||
const message = {
|
||||
role: "user",
|
||||
content: [
|
||||
{
|
||||
type: "tool_result",
|
||||
tool_use_id: "toolu_min",
|
||||
name: "exec",
|
||||
content: [{ type: "text", text: content }],
|
||||
},
|
||||
],
|
||||
} as unknown as AgentMessage;
|
||||
|
||||
// Even with very small context, minKeepChars (2000) > content (1500)
|
||||
const result = truncateOversizedToolResults({
|
||||
message,
|
||||
contextWindowTokens: 100,
|
||||
saveArtifact: mockSaveArtifact,
|
||||
});
|
||||
|
||||
expect(result.truncated).toBe(false);
|
||||
});
|
||||
});
|
||||
});
|
||||
217
packages/core/src/agent/context-window/tool-result-truncation.ts
Normal file
217
packages/core/src/agent/context-window/tool-result-truncation.ts
Normal file
|
|
@ -0,0 +1,217 @@
|
|||
/**
|
||||
* Pre-emptive Tool Result Truncation
|
||||
*
|
||||
* Truncates oversized tool results BEFORE they are persisted to the session file.
|
||||
* The original full content is saved as an artifact so the agent can re-read it.
|
||||
*
|
||||
* This differs from tool-result-pruning.ts which operates AFTER persistence
|
||||
* during post-turn compaction. Pre-emptive truncation ensures:
|
||||
* 1. Session files don't grow unbounded
|
||||
* 2. Truncation markers tell the LLM where to find the full data
|
||||
* 3. The agent can use the read tool to access full artifacts when needed
|
||||
*/
|
||||
|
||||
import type { AgentMessage } from "@mariozechner/pi-agent-core";
|
||||
|
||||
// ─── Settings ─────────────────────────────────────────────────────────────────
|
||||
|
||||
export type ToolResultTruncationSettings = {
|
||||
/** Max fraction of context window a single tool result may occupy (default: 0.3) */
|
||||
maxResultContextShare: number;
|
||||
/** Absolute hard cap in characters (default: 400_000) */
|
||||
hardMaxResultChars: number;
|
||||
/** Minimum characters to always keep (default: 2_000) */
|
||||
minKeepChars: number;
|
||||
/** Fraction of budget allocated to head (default: 0.7) */
|
||||
headRatio: number;
|
||||
/** Fraction of budget allocated to tail (default: 0.2) */
|
||||
tailRatio: number;
|
||||
};
|
||||
|
||||
export const DEFAULT_TOOL_RESULT_TRUNCATION_SETTINGS: ToolResultTruncationSettings = {
|
||||
maxResultContextShare: 0.3,
|
||||
hardMaxResultChars: 400_000,
|
||||
minKeepChars: 2_000,
|
||||
headRatio: 0.7,
|
||||
tailRatio: 0.2,
|
||||
};
|
||||
|
||||
const CHARS_PER_TOKEN = 4;
|
||||
|
||||
// ─── Types ────────────────────────────────────────────────────────────────────
|
||||
|
||||
export type TruncatedToolResult = {
|
||||
toolCallId: string;
|
||||
toolName: string;
|
||||
originalChars: number;
|
||||
artifactRelPath: string;
|
||||
};
|
||||
|
||||
export type TruncationResult = {
|
||||
/** The (possibly modified) message */
|
||||
message: AgentMessage;
|
||||
/** Whether any truncation was applied */
|
||||
truncated: boolean;
|
||||
/** Info about each truncated tool result */
|
||||
artifacts: TruncatedToolResult[];
|
||||
};
|
||||
|
||||
// ─── Core Logic ───────────────────────────────────────────────────────────────
|
||||
|
||||
/**
|
||||
* Compute the max chars allowed for a single tool result.
|
||||
*/
|
||||
function computeMaxChars(
|
||||
contextWindowTokens: number,
|
||||
settings: ToolResultTruncationSettings,
|
||||
): number {
|
||||
const contextShare = contextWindowTokens * CHARS_PER_TOKEN * settings.maxResultContextShare;
|
||||
return Math.min(contextShare, settings.hardMaxResultChars);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract text content from a tool result content field.
|
||||
*/
|
||||
function extractText(content: unknown): string {
|
||||
if (typeof content === "string") return content;
|
||||
if (Array.isArray(content)) {
|
||||
const parts: string[] = [];
|
||||
for (const block of content) {
|
||||
if (typeof block === "string") {
|
||||
parts.push(block);
|
||||
} else if (block && typeof block === "object" && "text" in block && typeof block.text === "string") {
|
||||
parts.push(block.text);
|
||||
}
|
||||
}
|
||||
return parts.join("\n");
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
/**
|
||||
* Truncate a text string, keeping head and tail portions.
|
||||
*/
|
||||
function truncateText(
|
||||
text: string,
|
||||
maxChars: number,
|
||||
artifactRelPath: string,
|
||||
settings: ToolResultTruncationSettings,
|
||||
): string {
|
||||
const keepChars = Math.max(settings.minKeepChars, maxChars);
|
||||
if (text.length <= keepChars) return text;
|
||||
|
||||
const headChars = Math.floor(keepChars * settings.headRatio);
|
||||
const tailChars = Math.floor(keepChars * settings.tailRatio);
|
||||
|
||||
// Try to break at a newline boundary for the head
|
||||
let headEnd = headChars;
|
||||
const lastNewline = text.lastIndexOf("\n", headChars);
|
||||
if (lastNewline > headChars * 0.8) {
|
||||
headEnd = lastNewline;
|
||||
}
|
||||
|
||||
const head = text.slice(0, headEnd);
|
||||
const tail = text.slice(text.length - tailChars);
|
||||
|
||||
const marker =
|
||||
`\n\n[Tool result truncated: original ${text.length} chars. ` +
|
||||
`Full result saved to ${artifactRelPath}. ` +
|
||||
`Use the read tool to access the complete data if needed.]\n\n`;
|
||||
|
||||
return head + marker + tail;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a content block contains images (skip those).
|
||||
*/
|
||||
function hasImages(content: unknown): boolean {
|
||||
if (!Array.isArray(content)) return false;
|
||||
return content.some(
|
||||
(b: any) => b && typeof b === "object" && b.type === "image",
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Process a single user message. Detects oversized tool results and returns
|
||||
* truncation info. Does NOT save artifacts — the caller is responsible for that.
|
||||
*
|
||||
* @param saveArtifact - callback to save the original content and get the relative path
|
||||
*/
|
||||
export function truncateOversizedToolResults(params: {
|
||||
message: AgentMessage;
|
||||
contextWindowTokens: number;
|
||||
settings?: Partial<ToolResultTruncationSettings>;
|
||||
/** Called to save original content. Must return the relative artifact path. */
|
||||
saveArtifact: (toolCallId: string, content: string) => string;
|
||||
}): TruncationResult {
|
||||
const settings: ToolResultTruncationSettings = {
|
||||
...DEFAULT_TOOL_RESULT_TRUNCATION_SETTINGS,
|
||||
...params.settings,
|
||||
};
|
||||
|
||||
const msgAny = params.message as any;
|
||||
|
||||
// Only process user messages with array content (tool results come as user messages)
|
||||
if (params.message.role !== "user" || !Array.isArray(msgAny.content)) {
|
||||
return { message: params.message, truncated: false, artifacts: [] };
|
||||
}
|
||||
|
||||
const maxChars = computeMaxChars(params.contextWindowTokens, settings);
|
||||
let changed = false;
|
||||
const artifacts: TruncatedToolResult[] = [];
|
||||
const newContent: any[] = [];
|
||||
|
||||
for (const block of msgAny.content) {
|
||||
if (!block || typeof block !== "object" || block.type !== "tool_result") {
|
||||
newContent.push(block);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Skip image-containing results
|
||||
if (hasImages(block.content)) {
|
||||
newContent.push(block);
|
||||
continue;
|
||||
}
|
||||
|
||||
const text = extractText(block.content);
|
||||
|
||||
// Check if oversized (respect minKeepChars floor)
|
||||
const effectiveMax = Math.max(maxChars, settings.minKeepChars);
|
||||
if (text.length <= effectiveMax) {
|
||||
newContent.push(block);
|
||||
continue;
|
||||
}
|
||||
|
||||
const toolCallId = block.tool_use_id ?? "unknown";
|
||||
const toolName = block.name ?? "unknown";
|
||||
|
||||
// Save original as artifact
|
||||
const artifactRelPath = params.saveArtifact(toolCallId, text);
|
||||
|
||||
// Truncate the text
|
||||
const truncatedText = truncateText(text, maxChars, artifactRelPath, settings);
|
||||
|
||||
newContent.push({
|
||||
...block,
|
||||
content: [{ type: "text", text: truncatedText }],
|
||||
});
|
||||
|
||||
artifacts.push({
|
||||
toolCallId,
|
||||
toolName,
|
||||
originalChars: text.length,
|
||||
artifactRelPath,
|
||||
});
|
||||
changed = true;
|
||||
}
|
||||
|
||||
if (!changed) {
|
||||
return { message: params.message, truncated: false, artifacts: [] };
|
||||
}
|
||||
|
||||
return {
|
||||
message: { ...params.message, content: newContent } as AgentMessage,
|
||||
truncated: true,
|
||||
artifacts,
|
||||
};
|
||||
}
|
||||
246
packages/core/src/agent/session/artifact-integration.test.ts
Normal file
246
packages/core/src/agent/session/artifact-integration.test.ts
Normal file
|
|
@ -0,0 +1,246 @@
|
|||
/**
|
||||
* E2E Integration Test: Phase 1 — Artifact Storage + Pre-emptive Truncation
|
||||
*
|
||||
* Tests the full flow: SessionManager → truncateOversizedToolResults → artifact-store
|
||||
*/
|
||||
import { describe, it, expect, beforeEach, afterEach } from "vitest";
|
||||
import { mkdirSync, rmSync, existsSync } from "node:fs";
|
||||
import { join } from "node:path";
|
||||
import { tmpdir } from "node:os";
|
||||
import { SessionManager } from "./session-manager.js";
|
||||
import { readEntries } from "./storage.js";
|
||||
import { readToolResultArtifact } from "./artifact-store.js";
|
||||
|
||||
const makeTestDir = () => {
|
||||
const dir = join(tmpdir(), `multica-e2e-p1-${Date.now()}-${Math.random().toString(36).slice(2)}`);
|
||||
mkdirSync(dir, { recursive: true });
|
||||
return dir;
|
||||
};
|
||||
|
||||
/**
|
||||
* Extract text from a tool_result content field, which can be:
|
||||
* - a string (original format)
|
||||
* - an array of { type: "text", text: "..." } (after truncation)
|
||||
*/
|
||||
function extractContentText(content: unknown): string {
|
||||
if (typeof content === "string") return content;
|
||||
if (Array.isArray(content)) {
|
||||
return content
|
||||
.filter((b: any) => b?.type === "text")
|
||||
.map((b: any) => b.text)
|
||||
.join("");
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
describe("Phase 1 E2E: Artifact Storage + Pre-emptive Truncation", () => {
|
||||
let testDir: string;
|
||||
const sessionId = "test-session-e2e";
|
||||
|
||||
beforeEach(() => {
|
||||
testDir = makeTestDir();
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
rmSync(testDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
it("saves oversized tool result to artifact and truncates in session", async () => {
|
||||
const sm = new SessionManager({
|
||||
sessionId,
|
||||
baseDir: testDir,
|
||||
compactionMode: "tokens",
|
||||
contextWindowTokens: 100_000,
|
||||
enableToolResultTruncation: true,
|
||||
enableToolResultPruning: false,
|
||||
});
|
||||
|
||||
// Create an oversized tool result (> 30% of 100k * 4 chars = 120k chars)
|
||||
const bigContent = "X".repeat(200_000);
|
||||
const userMessage = {
|
||||
role: "user" as const,
|
||||
content: [
|
||||
{
|
||||
type: "tool_result" as const,
|
||||
tool_use_id: "call_abc123",
|
||||
content: bigContent,
|
||||
},
|
||||
],
|
||||
timestamp: Date.now(),
|
||||
};
|
||||
|
||||
sm.saveMessage(userMessage);
|
||||
await sm.flush();
|
||||
|
||||
// Verify: session file has truncated content
|
||||
const entries = readEntries(sessionId, { baseDir: testDir });
|
||||
const msgEntries = entries.filter((e) => e.type === "message");
|
||||
expect(msgEntries.length).toBe(1);
|
||||
|
||||
const saved = (msgEntries[0] as any).message;
|
||||
const savedText = extractContentText(saved.content[0].content);
|
||||
expect(savedText.length).toBeLessThan(bigContent.length);
|
||||
expect(savedText).toContain("Tool result truncated");
|
||||
expect(savedText).toContain("artifacts/");
|
||||
|
||||
// Verify: artifact file exists with full content
|
||||
const artifactContent = readToolResultArtifact(sessionId, "call_abc123", { baseDir: testDir });
|
||||
expect(artifactContent).toBe(bigContent);
|
||||
});
|
||||
|
||||
it("does NOT create artifact for small tool results", async () => {
|
||||
const sm = new SessionManager({
|
||||
sessionId,
|
||||
baseDir: testDir,
|
||||
compactionMode: "tokens",
|
||||
contextWindowTokens: 200_000,
|
||||
enableToolResultTruncation: true,
|
||||
enableToolResultPruning: false,
|
||||
});
|
||||
|
||||
const smallContent = "Small result data";
|
||||
const userMessage = {
|
||||
role: "user" as const,
|
||||
content: [
|
||||
{
|
||||
type: "tool_result" as const,
|
||||
tool_use_id: "call_small",
|
||||
content: smallContent,
|
||||
},
|
||||
],
|
||||
timestamp: Date.now(),
|
||||
};
|
||||
|
||||
sm.saveMessage(userMessage);
|
||||
await sm.flush();
|
||||
|
||||
// Verify: session file has full content (no truncation)
|
||||
const entries = readEntries(sessionId, { baseDir: testDir });
|
||||
const saved = (entries.find((e) => e.type === "message") as any).message;
|
||||
const savedText = extractContentText(saved.content[0].content);
|
||||
expect(savedText).toBe(smallContent);
|
||||
|
||||
// Verify: no artifacts directory created
|
||||
const artifactsDir = join(testDir, "sessions", sessionId, "artifacts");
|
||||
expect(existsSync(artifactsDir)).toBe(false);
|
||||
});
|
||||
|
||||
it("truncated message preserves head and tail of original content", async () => {
|
||||
const sm = new SessionManager({
|
||||
sessionId,
|
||||
baseDir: testDir,
|
||||
compactionMode: "tokens",
|
||||
contextWindowTokens: 50_000, // smaller window → lower threshold
|
||||
enableToolResultTruncation: true,
|
||||
enableToolResultPruning: false,
|
||||
});
|
||||
|
||||
// Create content with identifiable head and tail
|
||||
const head = "HEAD_MARKER_" + "A".repeat(10_000);
|
||||
const middle = "B".repeat(100_000);
|
||||
const tail = "C".repeat(10_000) + "_TAIL_MARKER";
|
||||
const bigContent = head + middle + tail;
|
||||
|
||||
const userMessage = {
|
||||
role: "user" as const,
|
||||
content: [
|
||||
{
|
||||
type: "tool_result" as const,
|
||||
tool_use_id: "call_headtail",
|
||||
content: bigContent,
|
||||
},
|
||||
],
|
||||
timestamp: Date.now(),
|
||||
};
|
||||
|
||||
sm.saveMessage(userMessage);
|
||||
await sm.flush();
|
||||
|
||||
const entries = readEntries(sessionId, { baseDir: testDir });
|
||||
const saved = (entries.find((e) => e.type === "message") as any).message;
|
||||
const savedText = extractContentText(saved.content[0].content);
|
||||
|
||||
// Head should be preserved
|
||||
expect(savedText).toContain("HEAD_MARKER_");
|
||||
// Tail should be preserved
|
||||
expect(savedText).toContain("_TAIL_MARKER");
|
||||
// Middle should be truncated
|
||||
expect(savedText.length).toBeLessThan(bigContent.length);
|
||||
});
|
||||
|
||||
it("handles multiple tool results in same message", async () => {
|
||||
const sm = new SessionManager({
|
||||
sessionId,
|
||||
baseDir: testDir,
|
||||
compactionMode: "tokens",
|
||||
contextWindowTokens: 50_000,
|
||||
enableToolResultTruncation: true,
|
||||
enableToolResultPruning: false,
|
||||
});
|
||||
|
||||
const bigContent1 = "RESULT1_" + "X".repeat(200_000);
|
||||
const smallContent = "small result";
|
||||
const bigContent2 = "RESULT2_" + "Y".repeat(200_000);
|
||||
|
||||
const userMessage = {
|
||||
role: "user" as const,
|
||||
content: [
|
||||
{ type: "tool_result" as const, tool_use_id: "call_big1", content: bigContent1 },
|
||||
{ type: "tool_result" as const, tool_use_id: "call_small", content: smallContent },
|
||||
{ type: "tool_result" as const, tool_use_id: "call_big2", content: bigContent2 },
|
||||
],
|
||||
timestamp: Date.now(),
|
||||
};
|
||||
|
||||
sm.saveMessage(userMessage);
|
||||
await sm.flush();
|
||||
|
||||
const entries = readEntries(sessionId, { baseDir: testDir });
|
||||
const saved = (entries.find((e) => e.type === "message") as any).message;
|
||||
|
||||
// Big results should be truncated
|
||||
const text0 = extractContentText(saved.content[0].content);
|
||||
const text2 = extractContentText(saved.content[2].content);
|
||||
expect(text0).toContain("Tool result truncated");
|
||||
expect(text2).toContain("Tool result truncated");
|
||||
|
||||
// Small result should be unchanged
|
||||
const text1 = extractContentText(saved.content[1].content);
|
||||
expect(text1).toBe(smallContent);
|
||||
|
||||
// Both artifacts should exist
|
||||
const art1 = readToolResultArtifact(sessionId, "call_big1", { baseDir: testDir });
|
||||
expect(art1).toContain("RESULT1_");
|
||||
const art2 = readToolResultArtifact(sessionId, "call_big2", { baseDir: testDir });
|
||||
expect(art2).toContain("RESULT2_");
|
||||
});
|
||||
|
||||
it("respects enableToolResultTruncation=false", async () => {
|
||||
const sm = new SessionManager({
|
||||
sessionId,
|
||||
baseDir: testDir,
|
||||
compactionMode: "tokens",
|
||||
contextWindowTokens: 50_000,
|
||||
enableToolResultTruncation: false, // Disabled
|
||||
enableToolResultPruning: false,
|
||||
});
|
||||
|
||||
const bigContent = "Z".repeat(200_000);
|
||||
const userMessage = {
|
||||
role: "user" as const,
|
||||
content: [
|
||||
{ type: "tool_result" as const, tool_use_id: "call_noop", content: bigContent },
|
||||
],
|
||||
timestamp: Date.now(),
|
||||
};
|
||||
|
||||
sm.saveMessage(userMessage);
|
||||
await sm.flush();
|
||||
|
||||
const entries = readEntries(sessionId, { baseDir: testDir });
|
||||
const saved = (entries.find((e) => e.type === "message") as any).message;
|
||||
// Should NOT be truncated since feature is disabled
|
||||
const savedText = extractContentText(saved.content[0].content);
|
||||
expect(savedText).toBe(bigContent);
|
||||
});
|
||||
});
|
||||
104
packages/core/src/agent/session/artifact-store.test.ts
Normal file
104
packages/core/src/agent/session/artifact-store.test.ts
Normal file
|
|
@ -0,0 +1,104 @@
|
|||
import { describe, it, expect, beforeEach, afterEach } from "vitest";
|
||||
import { mkdirSync, rmSync, existsSync, readFileSync } from "fs";
|
||||
import { join } from "path";
|
||||
import { tmpdir } from "os";
|
||||
import {
|
||||
saveToolResultArtifact,
|
||||
readToolResultArtifact,
|
||||
resolveArtifactsDir,
|
||||
resolveArtifactPath,
|
||||
} from "./artifact-store.js";
|
||||
|
||||
describe("artifact-store", () => {
|
||||
const testDir = join(tmpdir(), `multica-artifact-test-${Date.now()}`);
|
||||
const sessionsDir = join(testDir, "sessions");
|
||||
const sessionId = "test-session-001";
|
||||
|
||||
beforeEach(() => {
|
||||
mkdirSync(sessionsDir, { recursive: true });
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
rmSync(testDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
describe("resolveArtifactsDir", () => {
|
||||
it("should resolve to artifacts subdirectory", () => {
|
||||
const dir = resolveArtifactsDir(sessionId, { baseDir: sessionsDir });
|
||||
expect(dir).toBe(join(sessionsDir, sessionId, "artifacts"));
|
||||
});
|
||||
});
|
||||
|
||||
describe("resolveArtifactPath", () => {
|
||||
it("should resolve to a .txt file in the artifacts directory", () => {
|
||||
const path = resolveArtifactPath(sessionId, "toolu_abc123", { baseDir: sessionsDir });
|
||||
expect(path).toBe(join(sessionsDir, sessionId, "artifacts", "toolu_abc123.txt"));
|
||||
});
|
||||
|
||||
it("should sanitize unsafe characters in toolCallId", () => {
|
||||
const path = resolveArtifactPath(sessionId, "tool/../../../etc", { baseDir: sessionsDir });
|
||||
expect(path).not.toContain("..");
|
||||
expect(path.endsWith(".txt")).toBe(true);
|
||||
expect(path).toContain("artifacts");
|
||||
});
|
||||
});
|
||||
|
||||
describe("saveToolResultArtifact", () => {
|
||||
it("should save content to a file and return relative path", () => {
|
||||
const content = "Full stock data for 10 companies...";
|
||||
const relPath = saveToolResultArtifact(sessionId, "toolu_001", content, { baseDir: sessionsDir });
|
||||
|
||||
expect(relPath).toBe("artifacts/toolu_001.txt");
|
||||
|
||||
const filePath = join(sessionsDir, sessionId, relPath);
|
||||
expect(existsSync(filePath)).toBe(true);
|
||||
expect(readFileSync(filePath, "utf8")).toBe(content);
|
||||
});
|
||||
|
||||
it("should create artifacts directory if it does not exist", () => {
|
||||
const artifactsDir = resolveArtifactsDir(sessionId, { baseDir: sessionsDir });
|
||||
expect(existsSync(artifactsDir)).toBe(false);
|
||||
|
||||
saveToolResultArtifact(sessionId, "toolu_002", "data", { baseDir: sessionsDir });
|
||||
expect(existsSync(artifactsDir)).toBe(true);
|
||||
});
|
||||
|
||||
it("should handle multiple artifacts for the same session", () => {
|
||||
saveToolResultArtifact(sessionId, "toolu_001", "data1", { baseDir: sessionsDir });
|
||||
saveToolResultArtifact(sessionId, "toolu_002", "data2", { baseDir: sessionsDir });
|
||||
|
||||
const data1 = readToolResultArtifact(sessionId, "toolu_001", { baseDir: sessionsDir });
|
||||
const data2 = readToolResultArtifact(sessionId, "toolu_002", { baseDir: sessionsDir });
|
||||
expect(data1).toBe("data1");
|
||||
expect(data2).toBe("data2");
|
||||
});
|
||||
|
||||
it("should overwrite existing artifact with same toolCallId", () => {
|
||||
saveToolResultArtifact(sessionId, "toolu_001", "old data", { baseDir: sessionsDir });
|
||||
saveToolResultArtifact(sessionId, "toolu_001", "new data", { baseDir: sessionsDir });
|
||||
|
||||
const data = readToolResultArtifact(sessionId, "toolu_001", { baseDir: sessionsDir });
|
||||
expect(data).toBe("new data");
|
||||
});
|
||||
});
|
||||
|
||||
describe("readToolResultArtifact", () => {
|
||||
it("should return null for non-existent artifact", () => {
|
||||
const result = readToolResultArtifact(sessionId, "nonexistent", { baseDir: sessionsDir });
|
||||
expect(result).toBeNull();
|
||||
});
|
||||
|
||||
it("should return content for existing artifact", () => {
|
||||
saveToolResultArtifact(sessionId, "toolu_read", "test content", { baseDir: sessionsDir });
|
||||
const result = readToolResultArtifact(sessionId, "toolu_read", { baseDir: sessionsDir });
|
||||
expect(result).toBe("test content");
|
||||
});
|
||||
|
||||
it("should handle large content", () => {
|
||||
const largeContent = "x".repeat(500_000);
|
||||
saveToolResultArtifact(sessionId, "toolu_large", largeContent, { baseDir: sessionsDir });
|
||||
const result = readToolResultArtifact(sessionId, "toolu_large", { baseDir: sessionsDir });
|
||||
expect(result).toBe(largeContent);
|
||||
});
|
||||
});
|
||||
});
|
||||
87
packages/core/src/agent/session/artifact-store.ts
Normal file
87
packages/core/src/agent/session/artifact-store.ts
Normal file
|
|
@ -0,0 +1,87 @@
|
|||
/**
|
||||
* Artifact Store
|
||||
*
|
||||
* Preserves full tool result data when results are truncated for context window
|
||||
* management. Stored alongside session data so the agent can re-read them.
|
||||
*
|
||||
* Directory layout:
|
||||
* ~/.super-multica/sessions/{sessionId}/artifacts/{toolCallId}.txt
|
||||
*/
|
||||
|
||||
import { join } from "path";
|
||||
import { existsSync, mkdirSync, readFileSync, writeFileSync } from "fs";
|
||||
import { resolveSessionDir, type SessionStorageOptions } from "./storage.js";
|
||||
|
||||
export function resolveArtifactsDir(
|
||||
sessionId: string,
|
||||
options?: SessionStorageOptions,
|
||||
): string {
|
||||
return join(resolveSessionDir(sessionId, options), "artifacts");
|
||||
}
|
||||
|
||||
function ensureArtifactsDir(
|
||||
sessionId: string,
|
||||
options?: SessionStorageOptions,
|
||||
): void {
|
||||
const dir = resolveArtifactsDir(sessionId, options);
|
||||
try {
|
||||
mkdirSync(dir, { recursive: true });
|
||||
} catch (err) {
|
||||
if ((err as NodeJS.ErrnoException).code === "ENOENT") {
|
||||
mkdirSync(dir, { recursive: true });
|
||||
} else {
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Save tool result content as an artifact.
|
||||
*
|
||||
* @returns The relative path from session directory (e.g. "artifacts/{toolCallId}.txt")
|
||||
*/
|
||||
export function saveToolResultArtifact(
|
||||
sessionId: string,
|
||||
toolCallId: string,
|
||||
content: string,
|
||||
options?: SessionStorageOptions,
|
||||
): string {
|
||||
ensureArtifactsDir(sessionId, options);
|
||||
// Sanitize toolCallId for filesystem safety
|
||||
const safeId = toolCallId.replace(/[^a-zA-Z0-9_-]/g, "_");
|
||||
const fileName = `${safeId}.txt`;
|
||||
const filePath = join(resolveArtifactsDir(sessionId, options), fileName);
|
||||
writeFileSync(filePath, content, "utf8");
|
||||
return `artifacts/${fileName}`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Read a tool result artifact by toolCallId.
|
||||
*
|
||||
* @returns The full content, or null if not found.
|
||||
*/
|
||||
export function readToolResultArtifact(
|
||||
sessionId: string,
|
||||
toolCallId: string,
|
||||
options?: SessionStorageOptions,
|
||||
): string | null {
|
||||
const safeId = toolCallId.replace(/[^a-zA-Z0-9_-]/g, "_");
|
||||
const filePath = join(
|
||||
resolveArtifactsDir(sessionId, options),
|
||||
`${safeId}.txt`,
|
||||
);
|
||||
if (!existsSync(filePath)) return null;
|
||||
return readFileSync(filePath, "utf8");
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve the absolute path for an artifact.
|
||||
*/
|
||||
export function resolveArtifactPath(
|
||||
sessionId: string,
|
||||
toolCallId: string,
|
||||
options?: SessionStorageOptions,
|
||||
): string {
|
||||
const safeId = toolCallId.replace(/[^a-zA-Z0-9_-]/g, "_");
|
||||
return join(resolveArtifactsDir(sessionId, options), `${safeId}.txt`);
|
||||
}
|
||||
|
|
@ -11,6 +11,11 @@ import {
|
|||
pruneToolResults,
|
||||
type ToolResultPruningSettings,
|
||||
} from "../context-window/tool-result-pruning.js";
|
||||
import {
|
||||
truncateOversizedToolResults,
|
||||
type ToolResultTruncationSettings,
|
||||
} from "../context-window/tool-result-truncation.js";
|
||||
import { saveToolResultArtifact } from "./artifact-store.js";
|
||||
import type { RunLog } from "../run-log.js";
|
||||
|
||||
/** Get Kimi model for summarization (use a cheaper model than k2-thinking) */
|
||||
|
|
@ -62,6 +67,12 @@ export type SessionManagerOptions = {
|
|||
/** Tool result pruning settings */
|
||||
toolResultPruning?: Partial<ToolResultPruningSettings> | undefined;
|
||||
|
||||
// Pre-emptive tool result truncation
|
||||
/** Whether to enable pre-emptive truncation of oversized tool results (default: true) */
|
||||
enableToolResultTruncation?: boolean | undefined;
|
||||
/** Pre-emptive truncation settings */
|
||||
toolResultTruncation?: Partial<ToolResultTruncationSettings> | undefined;
|
||||
|
||||
// Observability
|
||||
/** RunLog instance for structured logging */
|
||||
runLog?: RunLog | undefined;
|
||||
|
|
@ -85,6 +96,9 @@ export class SessionManager {
|
|||
// Tool result pruning
|
||||
private readonly enableToolResultPruning: boolean;
|
||||
private readonly toolResultPruning: Partial<ToolResultPruningSettings> | undefined;
|
||||
// Pre-emptive truncation
|
||||
private readonly enableToolResultTruncation: boolean;
|
||||
private readonly toolResultTruncation: Partial<ToolResultTruncationSettings> | undefined;
|
||||
// Observability
|
||||
private readonly runLog: RunLog;
|
||||
|
||||
|
|
@ -114,6 +128,10 @@ export class SessionManager {
|
|||
this.enableToolResultPruning = options.enableToolResultPruning ?? true;
|
||||
this.toolResultPruning = options.toolResultPruning;
|
||||
|
||||
// Pre-emptive truncation (enabled by default)
|
||||
this.enableToolResultTruncation = options.enableToolResultTruncation ?? true;
|
||||
this.toolResultTruncation = options.toolResultTruncation;
|
||||
|
||||
// Observability
|
||||
this.runLog = options.runLog ?? { log() {}, async flush() {} };
|
||||
|
||||
|
|
@ -231,12 +249,36 @@ export class SessionManager {
|
|||
message: AgentMessage,
|
||||
options?: { internal?: boolean; displayContent?: UserMessage["content"]; source?: import("./types.js").MessageSource },
|
||||
) {
|
||||
// Pre-emptive truncation: save oversized tool results as artifacts
|
||||
// and persist a truncated version in the JSONL session file.
|
||||
let persistMessage = message;
|
||||
if (this.enableToolResultTruncation && message.role === "user") {
|
||||
const result = truncateOversizedToolResults({
|
||||
message,
|
||||
contextWindowTokens: this.contextWindowTokens,
|
||||
settings: this.toolResultTruncation,
|
||||
saveArtifact: (toolCallId, content) =>
|
||||
saveToolResultArtifact(this.sessionId, toolCallId, content, { baseDir: this.baseDir }),
|
||||
});
|
||||
if (result.truncated) {
|
||||
persistMessage = result.message;
|
||||
for (const art of result.artifacts) {
|
||||
this.runLog.log("tool_result_truncation", {
|
||||
tool_call_id: art.toolCallId,
|
||||
tool_name: art.toolName,
|
||||
original_chars: art.originalChars,
|
||||
artifact_path: art.artifactRelPath,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void this.enqueue(() =>
|
||||
appendEntry(
|
||||
this.sessionId,
|
||||
{
|
||||
type: "message",
|
||||
message,
|
||||
message: persistMessage,
|
||||
timestamp: Date.now(),
|
||||
...(options?.internal ? { internal: true } : {}),
|
||||
...(options?.displayContent !== undefined
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue