test(compaction): harden E2E integration tests for artifact pipeline

- Add real user messages for bootstrap protection in pruning tests
- Fix artifact directory path assertions (baseDir vs sessions/baseDir)
- Add cross-phase tests (Phase 1 truncation → Phase 2 pruning)
- Remove conditional assertion guards that could silently skip checks
- All 30 E2E integration tests now pass with mandatory assertions

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Jiayuan Zhang 2026-02-15 23:13:12 +08:00
parent 58f02a2080
commit b15e1eeb2a
2 changed files with 406 additions and 177 deletions

View file

@ -1,26 +1,26 @@
/**
* E2E Integration Test: Phase 2 Artifact-Aware Pruning + Summary Fallback
*
* Tests that tool result pruning preserves artifact references
* and that summary fallback extracts artifact paths.
* Test Matrix:
*
* Use Case Expected Outcome
*
* UC1: Soft trim with artifact ref Artifact ref in trim note
* UC2: Hard clear with artifact ref Artifact ref in placeholder
* UC3: Soft trim without artifact ref Normal trim (no artifact)
* UC4: Summary fallback extracts artifact refs "Saved Artifacts" section
* UC5: Cross-phase: Phase1 output Phase2 Ref survives full pipeline
*
*/
import { describe, it, expect } from "vitest";
import { pruneToolResults } from "./tool-result-pruning.js";
import { truncateOversizedToolResults } from "./tool-result-truncation.js";
import type { AgentMessage } from "@mariozechner/pi-agent-core";
/**
* Helper: build a user message with a single tool_result containing the given text.
*/
function makeToolResultMessage(text: string, toolUseId = "call_1"): AgentMessage {
return {
role: "user",
content: [
{
type: "tool_result",
tool_use_id: toolUseId,
content: text,
},
],
content: [{ type: "tool_result", tool_use_id: toolUseId, content: text }],
timestamp: Date.now(),
} as any;
}
@ -33,17 +33,37 @@ function makeAssistantMessage(text = "OK"): AgentMessage {
} as any;
}
/** A real user message (not tool_result) — needed for bootstrap protection in pruneToolResults */
function makeUserMessage(text = "Hello"): AgentMessage {
return {
role: "user",
content: text,
timestamp: Date.now(),
} as any;
}
function extractContentText(content: unknown): string {
if (typeof content === "string") return content;
if (Array.isArray(content)) {
return content
.filter((b: any) => b?.type === "text")
.map((b: any) => b.text)
.join("");
}
return "";
}
describe("Phase 2 E2E: Artifact-Aware Pruning", () => {
it("soft trim preserves artifact reference from pre-emptive truncation", () => {
// Simulate a tool result that was previously truncated by Phase 1 and contains an artifact ref
// UC1: Soft trim preserves artifact reference
it("UC1: soft trim preserves artifact reference in trimmed note", () => {
// Tool result with an artifact reference from Phase 1 truncation
const truncatedContent =
"A".repeat(3000) +
"\n\n[Tool result truncated: original 200000 chars. Full result saved to artifacts/call_abc123.txt. Use the read tool to access the complete data if needed.]\n\n" +
"B".repeat(3000);
// Build conversation that should trigger soft trimming
// Put older messages first (these get pruned), recent ones are protected
const messages: AgentMessage[] = [
makeUserMessage("start"),
makeAssistantMessage("Calling tool..."),
makeToolResultMessage(truncatedContent),
makeAssistantMessage("Processing..."),
@ -58,43 +78,38 @@ describe("Phase 2 E2E: Artifact-Aware Pruning", () => {
const result = pruneToolResults({
messages,
contextWindowTokens: 5_000, // Small window to trigger pruning
contextWindowTokens: 5_000,
settings: {
softTrimRatio: 0.0, // Always trigger soft trim
softTrimRatio: 0.0, // Always trigger
hardClearRatio: 1.0, // Never hard clear
minPrunableToolChars: 100,
keepLastAssistants: 3,
softTrim: {
maxChars: 2_000, // Trigger on the large result
headChars: 500,
tailChars: 500,
},
hardClear: {
enabled: false,
placeholder: "[Content removed]",
},
softTrim: { maxChars: 2_000, headChars: 500, tailChars: 500 },
hardClear: { enabled: false, placeholder: "[Content removed]" },
},
});
// Find the soft-trimmed message
if (result.changed && result.softTrimmed > 0) {
const trimmedMsg = result.messages[1] as any;
const text = trimmedMsg.content[0]?.text ?? trimmedMsg.content[0]?.content ?? "";
// The artifact reference should be preserved in the trim note
expect(text).toContain("artifacts/call_abc123.txt");
}
// Must actually trigger soft trimming
expect(result.changed).toBe(true);
expect(result.softTrimmed).toBeGreaterThan(0);
// The trimmed message should preserve the artifact reference (index 2 due to prepended user msg)
const trimmedMsg = result.messages[2] as any;
const text = extractContentText(trimmedMsg.content[0]?.content ?? trimmedMsg.content[0]);
expect(text).toContain("artifacts/call_abc123.txt");
});
it("hard clear preserves artifact reference", () => {
// UC2: Hard clear preserves artifact reference
it("UC2: hard clear preserves artifact reference in placeholder", () => {
const truncatedContent =
"X".repeat(80_000) +
"\n\n[Tool result truncated: Full result saved to artifacts/call_xyz.txt.]\n\n" +
"Y".repeat(20_000);
const messages: AgentMessage[] = [
makeUserMessage("start"),
makeAssistantMessage("old"),
makeToolResultMessage(truncatedContent),
// Add enough recent messages to push the old one into hard-clear range
makeAssistantMessage("a1"),
makeToolResultMessage("r1"),
makeAssistantMessage("a2"),
@ -110,45 +125,69 @@ describe("Phase 2 E2E: Artifact-Aware Pruning", () => {
contextWindowTokens: 2_000,
settings: {
softTrimRatio: 0.0,
hardClearRatio: 0.0, // Always trigger hard clear
hardClearRatio: 0.0, // Always trigger
minPrunableToolChars: 100,
keepLastAssistants: 3,
softTrim: {
maxChars: 50, // Everything over 50 gets soft trimmed first
headChars: 20,
tailChars: 20,
},
hardClear: {
enabled: true,
placeholder: "[Content removed]",
},
softTrim: { maxChars: 50, headChars: 20, tailChars: 20 },
hardClear: { enabled: true, placeholder: "[Content removed]" },
},
});
if (result.changed && result.hardCleared > 0) {
// Find the hard-cleared message (should be messages[1])
const clearedMsg = result.messages[1] as any;
const text = clearedMsg.content[0]?.text ?? "";
expect(text).toContain("[Content removed]");
expect(text).toContain("artifacts/call_xyz.txt");
}
expect(result.changed).toBe(true);
expect(result.hardCleared).toBeGreaterThan(0);
// The hard-cleared message should contain both the placeholder AND the artifact ref
const clearedMsg = result.messages[2] as any;
const text = extractContentText(clearedMsg.content[0]?.content ?? clearedMsg.content[0]);
expect(text).toContain("[Content removed]");
expect(text).toContain("artifacts/call_xyz.txt");
});
// UC3: Soft trim without artifact ref (baseline behavior unchanged)
it("UC3: soft trim without artifact reference works normally", () => {
const plainContent = "D".repeat(6_000); // No artifact reference
const messages: AgentMessage[] = [
makeUserMessage("start"),
makeAssistantMessage("call"),
makeToolResultMessage(plainContent),
makeAssistantMessage("r1"),
makeToolResultMessage("s"),
makeAssistantMessage("r2"),
makeToolResultMessage("s"),
makeAssistantMessage("r3"),
makeToolResultMessage("s"),
];
const result = pruneToolResults({
messages,
contextWindowTokens: 5_000,
settings: {
softTrimRatio: 0.0,
hardClearRatio: 1.0,
minPrunableToolChars: 100,
keepLastAssistants: 3,
softTrim: { maxChars: 2_000, headChars: 500, tailChars: 500 },
hardClear: { enabled: false, placeholder: "" },
},
});
expect(result.changed).toBe(true);
expect(result.softTrimmed).toBeGreaterThan(0);
const trimmedMsg = result.messages[2] as any;
const text = extractContentText(trimmedMsg.content[0]?.content ?? trimmedMsg.content[0]);
// Should have trim note but no artifact reference
expect(text).toContain("Tool result trimmed");
expect(text).not.toContain("artifacts/");
});
});
describe("Phase 2 E2E: Summary Fallback Artifact Extraction", () => {
it("DEFAULT_SUMMARY_INSTRUCTIONS mentions artifacts", async () => {
// Read the summarization module to verify instructions include artifact guidance
const { DEFAULT_SUMMARY_INSTRUCTIONS } = await import("./summarization.js") as any;
// The instructions are a module-level const, but not exported. Let's verify via
// the splitMessagesForSummary path that exercises the flow indirectly.
// Instead, let's verify the artifact detection in summary-fallback.
});
it("summary fallback includes artifact references section", async () => {
// Import the module to access the plain text fallback
// UC4: summary fallback extracts artifact references
it("UC4: summary fallback includes 'Saved Artifacts' section with all artifact refs", async () => {
const mod = await import("./summary-fallback.js");
// Create messages with artifact references embedded in tool results
const messages: AgentMessage[] = [
makeAssistantMessage("Let me read the file"),
{
@ -181,33 +220,140 @@ describe("Phase 2 E2E: Summary Fallback Artifact Extraction", () => {
} as any,
];
// Use summarizeWithFallback to exercise the full flow — but this requires
// an LLM model. Instead, we can test the behavior by causing all levels to fail.
// The summarizeWithFallback will fall through to Level 3 (plain text) if the model fails.
// Let's create a mock model that always throws.
// Force Level 3 fallback (plain text) by using a model that always throws
const failingModel = {
complete: () => { throw new Error("Test: no LLM available"); },
complete: () => { throw new Error("Test: no LLM"); },
};
try {
const result = await mod.summarizeWithFallback({
messages,
model: failingModel as any,
reserveTokens: 1024,
apiKey: "test-key",
instructions: "summarize",
availableTokens: 100_000,
});
const result = await mod.summarizeWithFallback({
messages,
model: failingModel as any,
reserveTokens: 1024,
apiKey: "test-key",
instructions: "summarize",
availableTokens: 100_000,
});
// Should fall through to Level 3 (plain-text fallback)
expect(result.level).toBe(3);
// The summary should contain artifact references
expect(result.summary).toContain("## Saved Artifacts");
expect(result.summary).toContain("artifacts/call_1.txt");
expect(result.summary).toContain("artifacts/call_2.txt");
} catch {
// If generateSummary isn't available as expected, at least verify
// the artifact extraction pattern works at the module level
}
// Must fall through to Level 3
expect(result.level).toBe(3);
// Summary must contain artifact references
expect(result.summary).toContain("## Saved Artifacts");
expect(result.summary).toContain("artifacts/call_1.txt");
expect(result.summary).toContain("artifacts/call_2.txt");
});
});
describe("Cross-Phase E2E: Phase 1 → Phase 2 Pipeline", () => {
// UC5: Phase 1 truncation output → Phase 2 pruning — artifact ref survives
it("UC5: artifact ref from Phase 1 truncation survives Phase 2 soft trim", () => {
// Phase 1: truncate an oversized tool result
const bigContent = "ORIGINAL_DATA_" + "Q".repeat(200_000);
let artifactPath = "";
const phase1Result = truncateOversizedToolResults({
message: {
role: "user",
content: [{ type: "tool_result", tool_use_id: "call_cross", content: bigContent }],
timestamp: Date.now(),
} as any,
contextWindowTokens: 50_000,
saveArtifact: (_id, _content) => {
artifactPath = `artifacts/call_cross.txt`;
return artifactPath;
},
});
// Phase 1 must have truncated
expect(phase1Result.truncated).toBe(true);
expect(phase1Result.artifacts.length).toBe(1);
expect(phase1Result.artifacts[0]!.toolCallId).toBe("call_cross");
// Extract the truncated text from Phase 1 output
const phase1Msg = phase1Result.message as any;
const phase1Text = extractContentText(phase1Msg.content[0].content);
expect(phase1Text).toContain("artifacts/call_cross.txt");
// Phase 2: feed Phase 1 output into pruneToolResults
const messages: AgentMessage[] = [
makeUserMessage("start"),
makeAssistantMessage("calling"),
phase1Result.message, // This is the Phase 1 truncated message
makeAssistantMessage("a1"),
makeToolResultMessage("s1"),
makeAssistantMessage("a2"),
makeToolResultMessage("s2"),
makeAssistantMessage("a3"),
makeToolResultMessage("s3"),
];
const phase2Result = pruneToolResults({
messages,
contextWindowTokens: 3_000,
settings: {
softTrimRatio: 0.0, // Always trigger
hardClearRatio: 1.0, // No hard clear
minPrunableToolChars: 100,
keepLastAssistants: 3,
softTrim: { maxChars: 2_000, headChars: 500, tailChars: 500 },
hardClear: { enabled: false, placeholder: "" },
},
});
expect(phase2Result.changed).toBe(true);
// The artifact reference must survive the Phase 2 soft trim (index 2 due to prepended user msg)
const finalMsg = phase2Result.messages[2] as any;
const finalText = extractContentText(finalMsg.content[0]?.content ?? finalMsg.content[0]);
expect(finalText).toContain("artifacts/call_cross.txt");
});
// UC5b: Phase 1 → Phase 2 hard clear also preserves
it("UC5b: artifact ref from Phase 1 truncation survives Phase 2 hard clear", () => {
const bigContent = "HC_DATA_" + "W".repeat(200_000);
const phase1Result = truncateOversizedToolResults({
message: {
role: "user",
content: [{ type: "tool_result", tool_use_id: "call_hc", content: bigContent }],
timestamp: Date.now(),
} as any,
contextWindowTokens: 50_000,
saveArtifact: () => "artifacts/call_hc.txt",
});
expect(phase1Result.truncated).toBe(true);
const messages: AgentMessage[] = [
makeUserMessage("start"),
makeAssistantMessage("calling"),
phase1Result.message,
makeAssistantMessage("a1"),
makeToolResultMessage("s1"),
makeAssistantMessage("a2"),
makeToolResultMessage("s2"),
makeAssistantMessage("a3"),
makeToolResultMessage("s3"),
];
const phase2Result = pruneToolResults({
messages,
contextWindowTokens: 1_000,
settings: {
softTrimRatio: 0.0,
hardClearRatio: 0.0, // Always hard clear
minPrunableToolChars: 100,
keepLastAssistants: 3,
softTrim: { maxChars: 50, headChars: 20, tailChars: 20 },
hardClear: { enabled: true, placeholder: "[Cleared]" },
},
});
expect(phase2Result.changed).toBe(true);
expect(phase2Result.hardCleared).toBeGreaterThan(0);
const finalMsg = phase2Result.messages[2] as any;
const finalText = extractContentText(finalMsg.content[0]?.content ?? finalMsg.content[0]);
expect(finalText).toContain("[Cleared]");
expect(finalText).toContain("artifacts/call_hc.txt");
});
});

View file

@ -1,7 +1,21 @@
/**
* E2E Integration Test: Phase 1 Artifact Storage + Pre-emptive Truncation
*
* Tests the full flow: SessionManager truncateOversizedToolResults artifact-store
* Tests the full flow: SessionManager.saveMessage() truncateOversizedToolResults artifact-store
*
* Test Matrix:
*
* Use Case Expected Outcome
*
* UC1: Oversized tool result Truncated + artifact
* UC2: Small tool result Pass-through, no art
* UC3: Head/tail preservation Markers preserved
* UC4: Multiple results (mixed sizes) Selective truncation
* UC5: Feature toggle disabled No truncation
* UC6: Session reload after truncation Truncated content
* UC7: Truncation marker format Correct format
* UC8: Artifact readable after reload Full content intact
*
*/
import { describe, it, expect, beforeEach, afterEach } from "vitest";
import { mkdirSync, rmSync, existsSync } from "node:fs";
@ -45,7 +59,8 @@ describe("Phase 1 E2E: Artifact Storage + Pre-emptive Truncation", () => {
rmSync(testDir, { recursive: true, force: true });
});
it("saves oversized tool result to artifact and truncates in session", async () => {
// UC1: Oversized tool result → truncated in session + artifact saved
it("UC1: oversized tool result is truncated and artifact is saved with full content", async () => {
const sm = new SessionManager({
sessionId,
baseDir: testDir,
@ -55,24 +70,15 @@ describe("Phase 1 E2E: Artifact Storage + Pre-emptive Truncation", () => {
enableToolResultPruning: false,
});
// Create an oversized tool result (> 30% of 100k * 4 chars = 120k chars)
const bigContent = "X".repeat(200_000);
const userMessage = {
role: "user" as const,
content: [
{
type: "tool_result" as const,
tool_use_id: "call_abc123",
content: bigContent,
},
],
sm.saveMessage({
role: "user",
content: [{ type: "tool_result", tool_use_id: "call_abc123", content: bigContent }],
timestamp: Date.now(),
};
sm.saveMessage(userMessage);
} as any);
await sm.flush();
// Verify: session file has truncated content
// Session file: truncated
const entries = readEntries(sessionId, { baseDir: testDir });
const msgEntries = entries.filter((e) => e.type === "message");
expect(msgEntries.length).toBe(1);
@ -83,12 +89,14 @@ describe("Phase 1 E2E: Artifact Storage + Pre-emptive Truncation", () => {
expect(savedText).toContain("Tool result truncated");
expect(savedText).toContain("artifacts/");
// Verify: artifact file exists with full content
// Artifact: full content preserved
const artifactContent = readToolResultArtifact(sessionId, "call_abc123", { baseDir: testDir });
expect(artifactContent).toBe(bigContent);
expect(artifactContent!.length).toBe(200_000);
});
it("does NOT create artifact for small tool results", async () => {
// UC2: Small tool result → pass-through, no artifact
it("UC2: small tool result passes through without truncation or artifact", async () => {
const sm = new SessionManager({
sessionId,
baseDir: testDir,
@ -99,76 +107,60 @@ describe("Phase 1 E2E: Artifact Storage + Pre-emptive Truncation", () => {
});
const smallContent = "Small result data";
const userMessage = {
role: "user" as const,
content: [
{
type: "tool_result" as const,
tool_use_id: "call_small",
content: smallContent,
},
],
sm.saveMessage({
role: "user",
content: [{ type: "tool_result", tool_use_id: "call_small", content: smallContent }],
timestamp: Date.now(),
};
sm.saveMessage(userMessage);
} as any);
await sm.flush();
// Verify: session file has full content (no truncation)
// Session file: unchanged content
const entries = readEntries(sessionId, { baseDir: testDir });
const saved = (entries.find((e) => e.type === "message") as any).message;
const savedText = extractContentText(saved.content[0].content);
expect(savedText).toBe(smallContent);
// Verify: no artifacts directory created
const artifactsDir = join(testDir, "sessions", sessionId, "artifacts");
// No artifacts directory
const artifactsDir = join(testDir, sessionId, "artifacts");
expect(existsSync(artifactsDir)).toBe(false);
});
it("truncated message preserves head and tail of original content", async () => {
// UC3: Head/tail preservation
it("UC3: truncated content preserves identifiable head and tail markers", async () => {
const sm = new SessionManager({
sessionId,
baseDir: testDir,
compactionMode: "tokens",
contextWindowTokens: 50_000, // smaller window → lower threshold
contextWindowTokens: 50_000,
enableToolResultTruncation: true,
enableToolResultPruning: false,
});
// Create content with identifiable head and tail
const head = "HEAD_MARKER_" + "A".repeat(10_000);
const head = "HEAD_MARKER_START" + "A".repeat(10_000);
const middle = "B".repeat(100_000);
const tail = "C".repeat(10_000) + "_TAIL_MARKER";
const tail = "C".repeat(10_000) + "TAIL_MARKER_END";
const bigContent = head + middle + tail;
const userMessage = {
role: "user" as const,
content: [
{
type: "tool_result" as const,
tool_use_id: "call_headtail",
content: bigContent,
},
],
sm.saveMessage({
role: "user",
content: [{ type: "tool_result", tool_use_id: "call_ht", content: bigContent }],
timestamp: Date.now(),
};
sm.saveMessage(userMessage);
} as any);
await sm.flush();
const entries = readEntries(sessionId, { baseDir: testDir });
const saved = (entries.find((e) => e.type === "message") as any).message;
const savedText = extractContentText(saved.content[0].content);
// Head should be preserved
expect(savedText).toContain("HEAD_MARKER_");
// Tail should be preserved
expect(savedText).toContain("_TAIL_MARKER");
// Middle should be truncated
expect(savedText).toContain("HEAD_MARKER_START");
expect(savedText).toContain("TAIL_MARKER_END");
expect(savedText.length).toBeLessThan(bigContent.length);
// Must also have the truncation marker
expect(savedText).toContain("Tool result truncated");
});
it("handles multiple tool results in same message", async () => {
// UC4: Multiple tool results — selective truncation
it("UC4: message with mixed-size tool results truncates only oversized ones", async () => {
const sm = new SessionManager({
sessionId,
baseDir: testDir,
@ -178,69 +170,160 @@ describe("Phase 1 E2E: Artifact Storage + Pre-emptive Truncation", () => {
enableToolResultPruning: false,
});
const bigContent1 = "RESULT1_" + "X".repeat(200_000);
const smallContent = "small result";
const bigContent2 = "RESULT2_" + "Y".repeat(200_000);
const big1 = "BIG1_" + "X".repeat(200_000);
const small = "SMALL_RESULT_INTACT";
const big2 = "BIG2_" + "Y".repeat(200_000);
const userMessage = {
role: "user" as const,
sm.saveMessage({
role: "user",
content: [
{ type: "tool_result" as const, tool_use_id: "call_big1", content: bigContent1 },
{ type: "tool_result" as const, tool_use_id: "call_small", content: smallContent },
{ type: "tool_result" as const, tool_use_id: "call_big2", content: bigContent2 },
{ type: "tool_result", tool_use_id: "call_big1", content: big1 },
{ type: "tool_result", tool_use_id: "call_sm", content: small },
{ type: "tool_result", tool_use_id: "call_big2", content: big2 },
],
timestamp: Date.now(),
};
sm.saveMessage(userMessage);
} as any);
await sm.flush();
const entries = readEntries(sessionId, { baseDir: testDir });
const saved = (entries.find((e) => e.type === "message") as any).message;
// Big results should be truncated
const text0 = extractContentText(saved.content[0].content);
const text2 = extractContentText(saved.content[2].content);
expect(text0).toContain("Tool result truncated");
expect(text2).toContain("Tool result truncated");
// Big results: truncated
const t0 = extractContentText(saved.content[0].content);
const t2 = extractContentText(saved.content[2].content);
expect(t0).toContain("Tool result truncated");
expect(t2).toContain("Tool result truncated");
expect(t0.length).toBeLessThan(big1.length);
expect(t2.length).toBeLessThan(big2.length);
// Small result should be unchanged
const text1 = extractContentText(saved.content[1].content);
expect(text1).toBe(smallContent);
// Small result: intact
const t1 = extractContentText(saved.content[1].content);
expect(t1).toBe(small);
// Both artifacts should exist
// Both artifacts saved with full content
const art1 = readToolResultArtifact(sessionId, "call_big1", { baseDir: testDir });
expect(art1).toContain("RESULT1_");
expect(art1).toBe(big1);
const art2 = readToolResultArtifact(sessionId, "call_big2", { baseDir: testDir });
expect(art2).toContain("RESULT2_");
expect(art2).toBe(big2);
});
it("respects enableToolResultTruncation=false", async () => {
// UC5: Feature disabled → no truncation
it("UC5: enableToolResultTruncation=false skips all truncation", async () => {
const sm = new SessionManager({
sessionId,
baseDir: testDir,
compactionMode: "tokens",
contextWindowTokens: 50_000,
enableToolResultTruncation: false, // Disabled
enableToolResultTruncation: false,
enableToolResultPruning: false,
});
const bigContent = "Z".repeat(200_000);
const userMessage = {
role: "user" as const,
content: [
{ type: "tool_result" as const, tool_use_id: "call_noop", content: bigContent },
],
sm.saveMessage({
role: "user",
content: [{ type: "tool_result", tool_use_id: "call_noop", content: bigContent }],
timestamp: Date.now(),
};
sm.saveMessage(userMessage);
} as any);
await sm.flush();
const entries = readEntries(sessionId, { baseDir: testDir });
const saved = (entries.find((e) => e.type === "message") as any).message;
// Should NOT be truncated since feature is disabled
const savedText = extractContentText(saved.content[0].content);
expect(savedText).toBe(bigContent);
expect(savedText).not.toContain("Tool result truncated");
});
// UC6: Session reload after truncation
it("UC6: loadMessages() returns truncated content after save+reload", async () => {
const sm = new SessionManager({
sessionId,
baseDir: testDir,
compactionMode: "tokens",
contextWindowTokens: 100_000,
enableToolResultTruncation: true,
enableToolResultPruning: false,
});
const bigContent = "RELOAD_TEST_" + "R".repeat(200_000);
sm.saveMessage({
role: "user",
content: [{ type: "tool_result", tool_use_id: "call_reload", content: bigContent }],
timestamp: Date.now(),
} as any);
await sm.flush();
// Create a fresh SessionManager to reload
const sm2 = new SessionManager({
sessionId,
baseDir: testDir,
compactionMode: "tokens",
contextWindowTokens: 100_000,
});
const messages = sm2.loadMessages();
expect(messages.length).toBe(1);
const loaded = messages[0] as any;
const loadedText = extractContentText(loaded.content[0].content);
// Loaded messages should show truncated content (not full)
expect(loadedText).toContain("Tool result truncated");
expect(loadedText).toContain("artifacts/");
expect(loadedText.length).toBeLessThan(bigContent.length);
});
// UC7: Truncation marker format
it("UC7: truncation marker contains original size and artifact path", async () => {
const sm = new SessionManager({
sessionId,
baseDir: testDir,
compactionMode: "tokens",
contextWindowTokens: 100_000,
enableToolResultTruncation: true,
enableToolResultPruning: false,
});
const bigContent = "M".repeat(200_000);
sm.saveMessage({
role: "user",
content: [{ type: "tool_result", tool_use_id: "call_fmt", content: bigContent }],
timestamp: Date.now(),
} as any);
await sm.flush();
const entries = readEntries(sessionId, { baseDir: testDir });
const saved = (entries.find((e) => e.type === "message") as any).message;
const savedText = extractContentText(saved.content[0].content);
// Marker should include: original size, artifact path, and "read tool" hint
expect(savedText).toMatch(/original 200000 chars/);
expect(savedText).toMatch(/Full result saved to artifacts\/call_fmt\.txt/);
expect(savedText).toContain("read tool");
});
// UC8: Artifact readable via readToolResultArtifact after session operations
it("UC8: artifact is readable by toolCallId and contains exact original content", async () => {
const sm = new SessionManager({
sessionId,
baseDir: testDir,
compactionMode: "tokens",
contextWindowTokens: 100_000,
enableToolResultTruncation: true,
enableToolResultPruning: false,
});
// Use content with specific patterns to verify exact preservation
const specialContent = "START|" + "αβγδ".repeat(50_000) + "|END";
sm.saveMessage({
role: "user",
content: [{ type: "tool_result", tool_use_id: "call_exact", content: specialContent }],
timestamp: Date.now(),
} as any);
await sm.flush();
const artifact = readToolResultArtifact(sessionId, "call_exact", { baseDir: testDir });
expect(artifact).toBe(specialContent);
// Also verify the artifacts directory exists
const artifactsDir = join(testDir, sessionId, "artifacts");
expect(existsSync(artifactsDir)).toBe(true);
});
});