From b15e1eeb2ac21edc52a074935cd23aab0f322ad5 Mon Sep 17 00:00:00 2001 From: Jiayuan Zhang Date: Sun, 15 Feb 2026 23:13:12 +0800 Subject: [PATCH] test(compaction): harden E2E integration tests for artifact pipeline MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add real user messages for bootstrap protection in pruning tests - Fix artifact directory path assertions (baseDir vs sessions/baseDir) - Add cross-phase tests (Phase 1 truncation → Phase 2 pruning) - Remove conditional assertion guards that could silently skip checks - All 30 E2E integration tests now pass with mandatory assertions Co-Authored-By: Claude Opus 4.6 --- .../artifact-pruning-integration.test.ts | 322 +++++++++++++----- .../session/artifact-integration.test.ts | 261 +++++++++----- 2 files changed, 406 insertions(+), 177 deletions(-) diff --git a/packages/core/src/agent/context-window/artifact-pruning-integration.test.ts b/packages/core/src/agent/context-window/artifact-pruning-integration.test.ts index a642c052..152aae4d 100644 --- a/packages/core/src/agent/context-window/artifact-pruning-integration.test.ts +++ b/packages/core/src/agent/context-window/artifact-pruning-integration.test.ts @@ -1,26 +1,26 @@ /** * E2E Integration Test: Phase 2 — Artifact-Aware Pruning + Summary Fallback * - * Tests that tool result pruning preserves artifact references - * and that summary fallback extracts artifact paths. + * Test Matrix: + * ┌──────────────────────────────────────────────┬──────────────────────────────┐ + * │ Use Case │ Expected Outcome │ + * ├──────────────────────────────────────────────┼──────────────────────────────┤ + * │ UC1: Soft trim with artifact ref │ Artifact ref in trim note │ + * │ UC2: Hard clear with artifact ref │ Artifact ref in placeholder │ + * │ UC3: Soft trim without artifact ref │ Normal trim (no artifact) │ + * │ UC4: Summary fallback extracts artifact refs │ "Saved Artifacts" section │ + * │ UC5: Cross-phase: Phase1 output → Phase2 │ Ref survives full pipeline │ + * └──────────────────────────────────────────────┴──────────────────────────────┘ */ import { describe, it, expect } from "vitest"; import { pruneToolResults } from "./tool-result-pruning.js"; +import { truncateOversizedToolResults } from "./tool-result-truncation.js"; import type { AgentMessage } from "@mariozechner/pi-agent-core"; -/** - * Helper: build a user message with a single tool_result containing the given text. - */ function makeToolResultMessage(text: string, toolUseId = "call_1"): AgentMessage { return { role: "user", - content: [ - { - type: "tool_result", - tool_use_id: toolUseId, - content: text, - }, - ], + content: [{ type: "tool_result", tool_use_id: toolUseId, content: text }], timestamp: Date.now(), } as any; } @@ -33,17 +33,37 @@ function makeAssistantMessage(text = "OK"): AgentMessage { } as any; } +/** A real user message (not tool_result) — needed for bootstrap protection in pruneToolResults */ +function makeUserMessage(text = "Hello"): AgentMessage { + return { + role: "user", + content: text, + timestamp: Date.now(), + } as any; +} + +function extractContentText(content: unknown): string { + if (typeof content === "string") return content; + if (Array.isArray(content)) { + return content + .filter((b: any) => b?.type === "text") + .map((b: any) => b.text) + .join(""); + } + return ""; +} + describe("Phase 2 E2E: Artifact-Aware Pruning", () => { - it("soft trim preserves artifact reference from pre-emptive truncation", () => { - // Simulate a tool result that was previously truncated by Phase 1 and contains an artifact ref + // UC1: Soft trim preserves artifact reference + it("UC1: soft trim preserves artifact reference in trimmed note", () => { + // Tool result with an artifact reference from Phase 1 truncation const truncatedContent = "A".repeat(3000) + "\n\n[Tool result truncated: original 200000 chars. Full result saved to artifacts/call_abc123.txt. Use the read tool to access the complete data if needed.]\n\n" + "B".repeat(3000); - // Build conversation that should trigger soft trimming - // Put older messages first (these get pruned), recent ones are protected const messages: AgentMessage[] = [ + makeUserMessage("start"), makeAssistantMessage("Calling tool..."), makeToolResultMessage(truncatedContent), makeAssistantMessage("Processing..."), @@ -58,43 +78,38 @@ describe("Phase 2 E2E: Artifact-Aware Pruning", () => { const result = pruneToolResults({ messages, - contextWindowTokens: 5_000, // Small window to trigger pruning + contextWindowTokens: 5_000, settings: { - softTrimRatio: 0.0, // Always trigger soft trim + softTrimRatio: 0.0, // Always trigger hardClearRatio: 1.0, // Never hard clear minPrunableToolChars: 100, keepLastAssistants: 3, - softTrim: { - maxChars: 2_000, // Trigger on the large result - headChars: 500, - tailChars: 500, - }, - hardClear: { - enabled: false, - placeholder: "[Content removed]", - }, + softTrim: { maxChars: 2_000, headChars: 500, tailChars: 500 }, + hardClear: { enabled: false, placeholder: "[Content removed]" }, }, }); - // Find the soft-trimmed message - if (result.changed && result.softTrimmed > 0) { - const trimmedMsg = result.messages[1] as any; - const text = trimmedMsg.content[0]?.text ?? trimmedMsg.content[0]?.content ?? ""; - // The artifact reference should be preserved in the trim note - expect(text).toContain("artifacts/call_abc123.txt"); - } + // Must actually trigger soft trimming + expect(result.changed).toBe(true); + expect(result.softTrimmed).toBeGreaterThan(0); + + // The trimmed message should preserve the artifact reference (index 2 due to prepended user msg) + const trimmedMsg = result.messages[2] as any; + const text = extractContentText(trimmedMsg.content[0]?.content ?? trimmedMsg.content[0]); + expect(text).toContain("artifacts/call_abc123.txt"); }); - it("hard clear preserves artifact reference", () => { + // UC2: Hard clear preserves artifact reference + it("UC2: hard clear preserves artifact reference in placeholder", () => { const truncatedContent = "X".repeat(80_000) + "\n\n[Tool result truncated: Full result saved to artifacts/call_xyz.txt.]\n\n" + "Y".repeat(20_000); const messages: AgentMessage[] = [ + makeUserMessage("start"), makeAssistantMessage("old"), makeToolResultMessage(truncatedContent), - // Add enough recent messages to push the old one into hard-clear range makeAssistantMessage("a1"), makeToolResultMessage("r1"), makeAssistantMessage("a2"), @@ -110,45 +125,69 @@ describe("Phase 2 E2E: Artifact-Aware Pruning", () => { contextWindowTokens: 2_000, settings: { softTrimRatio: 0.0, - hardClearRatio: 0.0, // Always trigger hard clear + hardClearRatio: 0.0, // Always trigger minPrunableToolChars: 100, keepLastAssistants: 3, - softTrim: { - maxChars: 50, // Everything over 50 gets soft trimmed first - headChars: 20, - tailChars: 20, - }, - hardClear: { - enabled: true, - placeholder: "[Content removed]", - }, + softTrim: { maxChars: 50, headChars: 20, tailChars: 20 }, + hardClear: { enabled: true, placeholder: "[Content removed]" }, }, }); - if (result.changed && result.hardCleared > 0) { - // Find the hard-cleared message (should be messages[1]) - const clearedMsg = result.messages[1] as any; - const text = clearedMsg.content[0]?.text ?? ""; - expect(text).toContain("[Content removed]"); - expect(text).toContain("artifacts/call_xyz.txt"); - } + expect(result.changed).toBe(true); + expect(result.hardCleared).toBeGreaterThan(0); + + // The hard-cleared message should contain both the placeholder AND the artifact ref + const clearedMsg = result.messages[2] as any; + const text = extractContentText(clearedMsg.content[0]?.content ?? clearedMsg.content[0]); + expect(text).toContain("[Content removed]"); + expect(text).toContain("artifacts/call_xyz.txt"); + }); + + // UC3: Soft trim without artifact ref (baseline behavior unchanged) + it("UC3: soft trim without artifact reference works normally", () => { + const plainContent = "D".repeat(6_000); // No artifact reference + + const messages: AgentMessage[] = [ + makeUserMessage("start"), + makeAssistantMessage("call"), + makeToolResultMessage(plainContent), + makeAssistantMessage("r1"), + makeToolResultMessage("s"), + makeAssistantMessage("r2"), + makeToolResultMessage("s"), + makeAssistantMessage("r3"), + makeToolResultMessage("s"), + ]; + + const result = pruneToolResults({ + messages, + contextWindowTokens: 5_000, + settings: { + softTrimRatio: 0.0, + hardClearRatio: 1.0, + minPrunableToolChars: 100, + keepLastAssistants: 3, + softTrim: { maxChars: 2_000, headChars: 500, tailChars: 500 }, + hardClear: { enabled: false, placeholder: "" }, + }, + }); + + expect(result.changed).toBe(true); + expect(result.softTrimmed).toBeGreaterThan(0); + + const trimmedMsg = result.messages[2] as any; + const text = extractContentText(trimmedMsg.content[0]?.content ?? trimmedMsg.content[0]); + // Should have trim note but no artifact reference + expect(text).toContain("Tool result trimmed"); + expect(text).not.toContain("artifacts/"); }); }); describe("Phase 2 E2E: Summary Fallback Artifact Extraction", () => { - it("DEFAULT_SUMMARY_INSTRUCTIONS mentions artifacts", async () => { - // Read the summarization module to verify instructions include artifact guidance - const { DEFAULT_SUMMARY_INSTRUCTIONS } = await import("./summarization.js") as any; - // The instructions are a module-level const, but not exported. Let's verify via - // the splitMessagesForSummary path that exercises the flow indirectly. - // Instead, let's verify the artifact detection in summary-fallback. - }); - - it("summary fallback includes artifact references section", async () => { - // Import the module to access the plain text fallback + // UC4: summary fallback extracts artifact references + it("UC4: summary fallback includes 'Saved Artifacts' section with all artifact refs", async () => { const mod = await import("./summary-fallback.js"); - // Create messages with artifact references embedded in tool results const messages: AgentMessage[] = [ makeAssistantMessage("Let me read the file"), { @@ -181,33 +220,140 @@ describe("Phase 2 E2E: Summary Fallback Artifact Extraction", () => { } as any, ]; - // Use summarizeWithFallback to exercise the full flow — but this requires - // an LLM model. Instead, we can test the behavior by causing all levels to fail. - // The summarizeWithFallback will fall through to Level 3 (plain text) if the model fails. - // Let's create a mock model that always throws. + // Force Level 3 fallback (plain text) by using a model that always throws const failingModel = { - complete: () => { throw new Error("Test: no LLM available"); }, + complete: () => { throw new Error("Test: no LLM"); }, }; - try { - const result = await mod.summarizeWithFallback({ - messages, - model: failingModel as any, - reserveTokens: 1024, - apiKey: "test-key", - instructions: "summarize", - availableTokens: 100_000, - }); + const result = await mod.summarizeWithFallback({ + messages, + model: failingModel as any, + reserveTokens: 1024, + apiKey: "test-key", + instructions: "summarize", + availableTokens: 100_000, + }); - // Should fall through to Level 3 (plain-text fallback) - expect(result.level).toBe(3); - // The summary should contain artifact references - expect(result.summary).toContain("## Saved Artifacts"); - expect(result.summary).toContain("artifacts/call_1.txt"); - expect(result.summary).toContain("artifacts/call_2.txt"); - } catch { - // If generateSummary isn't available as expected, at least verify - // the artifact extraction pattern works at the module level - } + // Must fall through to Level 3 + expect(result.level).toBe(3); + // Summary must contain artifact references + expect(result.summary).toContain("## Saved Artifacts"); + expect(result.summary).toContain("artifacts/call_1.txt"); + expect(result.summary).toContain("artifacts/call_2.txt"); + }); +}); + +describe("Cross-Phase E2E: Phase 1 → Phase 2 Pipeline", () => { + // UC5: Phase 1 truncation output → Phase 2 pruning — artifact ref survives + it("UC5: artifact ref from Phase 1 truncation survives Phase 2 soft trim", () => { + // Phase 1: truncate an oversized tool result + const bigContent = "ORIGINAL_DATA_" + "Q".repeat(200_000); + let artifactPath = ""; + + const phase1Result = truncateOversizedToolResults({ + message: { + role: "user", + content: [{ type: "tool_result", tool_use_id: "call_cross", content: bigContent }], + timestamp: Date.now(), + } as any, + contextWindowTokens: 50_000, + saveArtifact: (_id, _content) => { + artifactPath = `artifacts/call_cross.txt`; + return artifactPath; + }, + }); + + // Phase 1 must have truncated + expect(phase1Result.truncated).toBe(true); + expect(phase1Result.artifacts.length).toBe(1); + expect(phase1Result.artifacts[0]!.toolCallId).toBe("call_cross"); + + // Extract the truncated text from Phase 1 output + const phase1Msg = phase1Result.message as any; + const phase1Text = extractContentText(phase1Msg.content[0].content); + expect(phase1Text).toContain("artifacts/call_cross.txt"); + + // Phase 2: feed Phase 1 output into pruneToolResults + const messages: AgentMessage[] = [ + makeUserMessage("start"), + makeAssistantMessage("calling"), + phase1Result.message, // This is the Phase 1 truncated message + makeAssistantMessage("a1"), + makeToolResultMessage("s1"), + makeAssistantMessage("a2"), + makeToolResultMessage("s2"), + makeAssistantMessage("a3"), + makeToolResultMessage("s3"), + ]; + + const phase2Result = pruneToolResults({ + messages, + contextWindowTokens: 3_000, + settings: { + softTrimRatio: 0.0, // Always trigger + hardClearRatio: 1.0, // No hard clear + minPrunableToolChars: 100, + keepLastAssistants: 3, + softTrim: { maxChars: 2_000, headChars: 500, tailChars: 500 }, + hardClear: { enabled: false, placeholder: "" }, + }, + }); + + expect(phase2Result.changed).toBe(true); + + // The artifact reference must survive the Phase 2 soft trim (index 2 due to prepended user msg) + const finalMsg = phase2Result.messages[2] as any; + const finalText = extractContentText(finalMsg.content[0]?.content ?? finalMsg.content[0]); + expect(finalText).toContain("artifacts/call_cross.txt"); + }); + + // UC5b: Phase 1 → Phase 2 hard clear also preserves + it("UC5b: artifact ref from Phase 1 truncation survives Phase 2 hard clear", () => { + const bigContent = "HC_DATA_" + "W".repeat(200_000); + + const phase1Result = truncateOversizedToolResults({ + message: { + role: "user", + content: [{ type: "tool_result", tool_use_id: "call_hc", content: bigContent }], + timestamp: Date.now(), + } as any, + contextWindowTokens: 50_000, + saveArtifact: () => "artifacts/call_hc.txt", + }); + + expect(phase1Result.truncated).toBe(true); + + const messages: AgentMessage[] = [ + makeUserMessage("start"), + makeAssistantMessage("calling"), + phase1Result.message, + makeAssistantMessage("a1"), + makeToolResultMessage("s1"), + makeAssistantMessage("a2"), + makeToolResultMessage("s2"), + makeAssistantMessage("a3"), + makeToolResultMessage("s3"), + ]; + + const phase2Result = pruneToolResults({ + messages, + contextWindowTokens: 1_000, + settings: { + softTrimRatio: 0.0, + hardClearRatio: 0.0, // Always hard clear + minPrunableToolChars: 100, + keepLastAssistants: 3, + softTrim: { maxChars: 50, headChars: 20, tailChars: 20 }, + hardClear: { enabled: true, placeholder: "[Cleared]" }, + }, + }); + + expect(phase2Result.changed).toBe(true); + expect(phase2Result.hardCleared).toBeGreaterThan(0); + + const finalMsg = phase2Result.messages[2] as any; + const finalText = extractContentText(finalMsg.content[0]?.content ?? finalMsg.content[0]); + expect(finalText).toContain("[Cleared]"); + expect(finalText).toContain("artifacts/call_hc.txt"); }); }); diff --git a/packages/core/src/agent/session/artifact-integration.test.ts b/packages/core/src/agent/session/artifact-integration.test.ts index 833ef9dd..566e576b 100644 --- a/packages/core/src/agent/session/artifact-integration.test.ts +++ b/packages/core/src/agent/session/artifact-integration.test.ts @@ -1,7 +1,21 @@ /** * E2E Integration Test: Phase 1 — Artifact Storage + Pre-emptive Truncation * - * Tests the full flow: SessionManager → truncateOversizedToolResults → artifact-store + * Tests the full flow: SessionManager.saveMessage() → truncateOversizedToolResults → artifact-store + * + * Test Matrix: + * ┌─────────────────────────────────────────┬──────────────────────┐ + * │ Use Case │ Expected Outcome │ + * ├─────────────────────────────────────────┼──────────────────────┤ + * │ UC1: Oversized tool result │ Truncated + artifact │ + * │ UC2: Small tool result │ Pass-through, no art │ + * │ UC3: Head/tail preservation │ Markers preserved │ + * │ UC4: Multiple results (mixed sizes) │ Selective truncation │ + * │ UC5: Feature toggle disabled │ No truncation │ + * │ UC6: Session reload after truncation │ Truncated content │ + * │ UC7: Truncation marker format │ Correct format │ + * │ UC8: Artifact readable after reload │ Full content intact │ + * └─────────────────────────────────────────┴──────────────────────┘ */ import { describe, it, expect, beforeEach, afterEach } from "vitest"; import { mkdirSync, rmSync, existsSync } from "node:fs"; @@ -45,7 +59,8 @@ describe("Phase 1 E2E: Artifact Storage + Pre-emptive Truncation", () => { rmSync(testDir, { recursive: true, force: true }); }); - it("saves oversized tool result to artifact and truncates in session", async () => { + // UC1: Oversized tool result → truncated in session + artifact saved + it("UC1: oversized tool result is truncated and artifact is saved with full content", async () => { const sm = new SessionManager({ sessionId, baseDir: testDir, @@ -55,24 +70,15 @@ describe("Phase 1 E2E: Artifact Storage + Pre-emptive Truncation", () => { enableToolResultPruning: false, }); - // Create an oversized tool result (> 30% of 100k * 4 chars = 120k chars) const bigContent = "X".repeat(200_000); - const userMessage = { - role: "user" as const, - content: [ - { - type: "tool_result" as const, - tool_use_id: "call_abc123", - content: bigContent, - }, - ], + sm.saveMessage({ + role: "user", + content: [{ type: "tool_result", tool_use_id: "call_abc123", content: bigContent }], timestamp: Date.now(), - }; - - sm.saveMessage(userMessage); + } as any); await sm.flush(); - // Verify: session file has truncated content + // Session file: truncated const entries = readEntries(sessionId, { baseDir: testDir }); const msgEntries = entries.filter((e) => e.type === "message"); expect(msgEntries.length).toBe(1); @@ -83,12 +89,14 @@ describe("Phase 1 E2E: Artifact Storage + Pre-emptive Truncation", () => { expect(savedText).toContain("Tool result truncated"); expect(savedText).toContain("artifacts/"); - // Verify: artifact file exists with full content + // Artifact: full content preserved const artifactContent = readToolResultArtifact(sessionId, "call_abc123", { baseDir: testDir }); expect(artifactContent).toBe(bigContent); + expect(artifactContent!.length).toBe(200_000); }); - it("does NOT create artifact for small tool results", async () => { + // UC2: Small tool result → pass-through, no artifact + it("UC2: small tool result passes through without truncation or artifact", async () => { const sm = new SessionManager({ sessionId, baseDir: testDir, @@ -99,76 +107,60 @@ describe("Phase 1 E2E: Artifact Storage + Pre-emptive Truncation", () => { }); const smallContent = "Small result data"; - const userMessage = { - role: "user" as const, - content: [ - { - type: "tool_result" as const, - tool_use_id: "call_small", - content: smallContent, - }, - ], + sm.saveMessage({ + role: "user", + content: [{ type: "tool_result", tool_use_id: "call_small", content: smallContent }], timestamp: Date.now(), - }; - - sm.saveMessage(userMessage); + } as any); await sm.flush(); - // Verify: session file has full content (no truncation) + // Session file: unchanged content const entries = readEntries(sessionId, { baseDir: testDir }); const saved = (entries.find((e) => e.type === "message") as any).message; const savedText = extractContentText(saved.content[0].content); expect(savedText).toBe(smallContent); - // Verify: no artifacts directory created - const artifactsDir = join(testDir, "sessions", sessionId, "artifacts"); + // No artifacts directory + const artifactsDir = join(testDir, sessionId, "artifacts"); expect(existsSync(artifactsDir)).toBe(false); }); - it("truncated message preserves head and tail of original content", async () => { + // UC3: Head/tail preservation + it("UC3: truncated content preserves identifiable head and tail markers", async () => { const sm = new SessionManager({ sessionId, baseDir: testDir, compactionMode: "tokens", - contextWindowTokens: 50_000, // smaller window → lower threshold + contextWindowTokens: 50_000, enableToolResultTruncation: true, enableToolResultPruning: false, }); - // Create content with identifiable head and tail - const head = "HEAD_MARKER_" + "A".repeat(10_000); + const head = "HEAD_MARKER_START" + "A".repeat(10_000); const middle = "B".repeat(100_000); - const tail = "C".repeat(10_000) + "_TAIL_MARKER"; + const tail = "C".repeat(10_000) + "TAIL_MARKER_END"; const bigContent = head + middle + tail; - const userMessage = { - role: "user" as const, - content: [ - { - type: "tool_result" as const, - tool_use_id: "call_headtail", - content: bigContent, - }, - ], + sm.saveMessage({ + role: "user", + content: [{ type: "tool_result", tool_use_id: "call_ht", content: bigContent }], timestamp: Date.now(), - }; - - sm.saveMessage(userMessage); + } as any); await sm.flush(); const entries = readEntries(sessionId, { baseDir: testDir }); const saved = (entries.find((e) => e.type === "message") as any).message; const savedText = extractContentText(saved.content[0].content); - // Head should be preserved - expect(savedText).toContain("HEAD_MARKER_"); - // Tail should be preserved - expect(savedText).toContain("_TAIL_MARKER"); - // Middle should be truncated + expect(savedText).toContain("HEAD_MARKER_START"); + expect(savedText).toContain("TAIL_MARKER_END"); expect(savedText.length).toBeLessThan(bigContent.length); + // Must also have the truncation marker + expect(savedText).toContain("Tool result truncated"); }); - it("handles multiple tool results in same message", async () => { + // UC4: Multiple tool results — selective truncation + it("UC4: message with mixed-size tool results truncates only oversized ones", async () => { const sm = new SessionManager({ sessionId, baseDir: testDir, @@ -178,69 +170,160 @@ describe("Phase 1 E2E: Artifact Storage + Pre-emptive Truncation", () => { enableToolResultPruning: false, }); - const bigContent1 = "RESULT1_" + "X".repeat(200_000); - const smallContent = "small result"; - const bigContent2 = "RESULT2_" + "Y".repeat(200_000); + const big1 = "BIG1_" + "X".repeat(200_000); + const small = "SMALL_RESULT_INTACT"; + const big2 = "BIG2_" + "Y".repeat(200_000); - const userMessage = { - role: "user" as const, + sm.saveMessage({ + role: "user", content: [ - { type: "tool_result" as const, tool_use_id: "call_big1", content: bigContent1 }, - { type: "tool_result" as const, tool_use_id: "call_small", content: smallContent }, - { type: "tool_result" as const, tool_use_id: "call_big2", content: bigContent2 }, + { type: "tool_result", tool_use_id: "call_big1", content: big1 }, + { type: "tool_result", tool_use_id: "call_sm", content: small }, + { type: "tool_result", tool_use_id: "call_big2", content: big2 }, ], timestamp: Date.now(), - }; - - sm.saveMessage(userMessage); + } as any); await sm.flush(); const entries = readEntries(sessionId, { baseDir: testDir }); const saved = (entries.find((e) => e.type === "message") as any).message; - // Big results should be truncated - const text0 = extractContentText(saved.content[0].content); - const text2 = extractContentText(saved.content[2].content); - expect(text0).toContain("Tool result truncated"); - expect(text2).toContain("Tool result truncated"); + // Big results: truncated + const t0 = extractContentText(saved.content[0].content); + const t2 = extractContentText(saved.content[2].content); + expect(t0).toContain("Tool result truncated"); + expect(t2).toContain("Tool result truncated"); + expect(t0.length).toBeLessThan(big1.length); + expect(t2.length).toBeLessThan(big2.length); - // Small result should be unchanged - const text1 = extractContentText(saved.content[1].content); - expect(text1).toBe(smallContent); + // Small result: intact + const t1 = extractContentText(saved.content[1].content); + expect(t1).toBe(small); - // Both artifacts should exist + // Both artifacts saved with full content const art1 = readToolResultArtifact(sessionId, "call_big1", { baseDir: testDir }); - expect(art1).toContain("RESULT1_"); + expect(art1).toBe(big1); const art2 = readToolResultArtifact(sessionId, "call_big2", { baseDir: testDir }); - expect(art2).toContain("RESULT2_"); + expect(art2).toBe(big2); }); - it("respects enableToolResultTruncation=false", async () => { + // UC5: Feature disabled → no truncation + it("UC5: enableToolResultTruncation=false skips all truncation", async () => { const sm = new SessionManager({ sessionId, baseDir: testDir, compactionMode: "tokens", contextWindowTokens: 50_000, - enableToolResultTruncation: false, // Disabled + enableToolResultTruncation: false, enableToolResultPruning: false, }); const bigContent = "Z".repeat(200_000); - const userMessage = { - role: "user" as const, - content: [ - { type: "tool_result" as const, tool_use_id: "call_noop", content: bigContent }, - ], + sm.saveMessage({ + role: "user", + content: [{ type: "tool_result", tool_use_id: "call_noop", content: bigContent }], timestamp: Date.now(), - }; - - sm.saveMessage(userMessage); + } as any); await sm.flush(); const entries = readEntries(sessionId, { baseDir: testDir }); const saved = (entries.find((e) => e.type === "message") as any).message; - // Should NOT be truncated since feature is disabled const savedText = extractContentText(saved.content[0].content); expect(savedText).toBe(bigContent); + expect(savedText).not.toContain("Tool result truncated"); + }); + + // UC6: Session reload after truncation + it("UC6: loadMessages() returns truncated content after save+reload", async () => { + const sm = new SessionManager({ + sessionId, + baseDir: testDir, + compactionMode: "tokens", + contextWindowTokens: 100_000, + enableToolResultTruncation: true, + enableToolResultPruning: false, + }); + + const bigContent = "RELOAD_TEST_" + "R".repeat(200_000); + sm.saveMessage({ + role: "user", + content: [{ type: "tool_result", tool_use_id: "call_reload", content: bigContent }], + timestamp: Date.now(), + } as any); + await sm.flush(); + + // Create a fresh SessionManager to reload + const sm2 = new SessionManager({ + sessionId, + baseDir: testDir, + compactionMode: "tokens", + contextWindowTokens: 100_000, + }); + const messages = sm2.loadMessages(); + expect(messages.length).toBe(1); + + const loaded = messages[0] as any; + const loadedText = extractContentText(loaded.content[0].content); + // Loaded messages should show truncated content (not full) + expect(loadedText).toContain("Tool result truncated"); + expect(loadedText).toContain("artifacts/"); + expect(loadedText.length).toBeLessThan(bigContent.length); + }); + + // UC7: Truncation marker format + it("UC7: truncation marker contains original size and artifact path", async () => { + const sm = new SessionManager({ + sessionId, + baseDir: testDir, + compactionMode: "tokens", + contextWindowTokens: 100_000, + enableToolResultTruncation: true, + enableToolResultPruning: false, + }); + + const bigContent = "M".repeat(200_000); + sm.saveMessage({ + role: "user", + content: [{ type: "tool_result", tool_use_id: "call_fmt", content: bigContent }], + timestamp: Date.now(), + } as any); + await sm.flush(); + + const entries = readEntries(sessionId, { baseDir: testDir }); + const saved = (entries.find((e) => e.type === "message") as any).message; + const savedText = extractContentText(saved.content[0].content); + + // Marker should include: original size, artifact path, and "read tool" hint + expect(savedText).toMatch(/original 200000 chars/); + expect(savedText).toMatch(/Full result saved to artifacts\/call_fmt\.txt/); + expect(savedText).toContain("read tool"); + }); + + // UC8: Artifact readable via readToolResultArtifact after session operations + it("UC8: artifact is readable by toolCallId and contains exact original content", async () => { + const sm = new SessionManager({ + sessionId, + baseDir: testDir, + compactionMode: "tokens", + contextWindowTokens: 100_000, + enableToolResultTruncation: true, + enableToolResultPruning: false, + }); + + // Use content with specific patterns to verify exact preservation + const specialContent = "START|" + "αβγδ".repeat(50_000) + "|END"; + sm.saveMessage({ + role: "user", + content: [{ type: "tool_result", tool_use_id: "call_exact", content: specialContent }], + timestamp: Date.now(), + } as any); + await sm.flush(); + + const artifact = readToolResultArtifact(sessionId, "call_exact", { baseDir: testDir }); + expect(artifact).toBe(specialContent); + + // Also verify the artifacts directory exists + const artifactsDir = join(testDir, sessionId, "artifacts"); + expect(existsSync(artifactsDir)).toBe(true); }); });