test(compaction): harden E2E integration tests for artifact pipeline

- Add real user messages for bootstrap protection in pruning tests - Fix artifact directory path assertions (baseDir vs sessions/baseDir) - Add cross-phase tests (Phase 1 truncation → Phase 2 pruning) - Remove conditional assertion guards that could silently skip checks - All 30 E2E integration tests now pass with mandatory assertions Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-15 23:13:12 +08:00 · 2026-02-15 23:13:12 +08:00 · b15e1eeb2a
commit b15e1eeb2a
parent 58f02a2080
2 changed files with 406 additions and 177 deletions
--- a/packages/core/src/agent/context-window/artifact-pruning-integration.test.ts
+++ b/packages/core/src/agent/context-window/artifact-pruning-integration.test.ts
@ -1,26 +1,26 @@
 /**
 * E2E Integration Test: Phase 2 — Artifact-Aware Pruning + Summary Fallback
 *
- * Tests that tool result pruning preserves artifact references
- * and that summary fallback extracts artifact paths.
+ * Test Matrix:
+ * ┌──────────────────────────────────────────────┬──────────────────────────────┐
+ * │ Use Case                                     │ Expected Outcome             │
+ * ├──────────────────────────────────────────────┼──────────────────────────────┤
+ * │ UC1: Soft trim with artifact ref             │ Artifact ref in trim note    │
+ * │ UC2: Hard clear with artifact ref            │ Artifact ref in placeholder  │
+ * │ UC3: Soft trim without artifact ref          │ Normal trim (no artifact)    │
+ * │ UC4: Summary fallback extracts artifact refs │ "Saved Artifacts" section    │
+ * │ UC5: Cross-phase: Phase1 output → Phase2     │ Ref survives full pipeline   │
+ * └──────────────────────────────────────────────┴──────────────────────────────┘
 */
 import { describe, it, expect } from "vitest";
 import { pruneToolResults } from "./tool-result-pruning.js";
+import { truncateOversizedToolResults } from "./tool-result-truncation.js";
 import type { AgentMessage } from "@mariozechner/pi-agent-core";

-/**
- * Helper: build a user message with a single tool_result containing the given text.
- */
 function makeToolResultMessage(text: string, toolUseId = "call_1"): AgentMessage {
  return {
    role: "user",
-    content: [
-      {
-        type: "tool_result",
-        tool_use_id: toolUseId,
-        content: text,
-      },
-    ],
+    content: [{ type: "tool_result", tool_use_id: toolUseId, content: text }],
    timestamp: Date.now(),
  } as any;
 }
@ -33,17 +33,37 @@ function makeAssistantMessage(text = "OK"): AgentMessage {
  } as any;
 }

+/** A real user message (not tool_result) — needed for bootstrap protection in pruneToolResults */
+function makeUserMessage(text = "Hello"): AgentMessage {
+  return {
+    role: "user",
+    content: text,
+    timestamp: Date.now(),
+  } as any;
+}
+
+function extractContentText(content: unknown): string {
+  if (typeof content === "string") return content;
+  if (Array.isArray(content)) {
+    return content
+      .filter((b: any) => b?.type === "text")
+      .map((b: any) => b.text)
+      .join("");
+  }
+  return "";
+}
+
 describe("Phase 2 E2E: Artifact-Aware Pruning", () => {
-  it("soft trim preserves artifact reference from pre-emptive truncation", () => {
-    // Simulate a tool result that was previously truncated by Phase 1 and contains an artifact ref
+  // UC1: Soft trim preserves artifact reference
+  it("UC1: soft trim preserves artifact reference in trimmed note", () => {
+    // Tool result with an artifact reference from Phase 1 truncation
    const truncatedContent =
      "A".repeat(3000) +
      "\n\n[Tool result truncated: original 200000 chars. Full result saved to artifacts/call_abc123.txt. Use the read tool to access the complete data if needed.]\n\n" +
      "B".repeat(3000);

-    // Build conversation that should trigger soft trimming
-    // Put older messages first (these get pruned), recent ones are protected
    const messages: AgentMessage[] = [
+      makeUserMessage("start"),
      makeAssistantMessage("Calling tool..."),
      makeToolResultMessage(truncatedContent),
      makeAssistantMessage("Processing..."),
@ -58,43 +78,38 @@ describe("Phase 2 E2E: Artifact-Aware Pruning", () => {

    const result = pruneToolResults({
      messages,
-      contextWindowTokens: 5_000, // Small window to trigger pruning
+      contextWindowTokens: 5_000,
      settings: {
-        softTrimRatio: 0.0, // Always trigger soft trim
+        softTrimRatio: 0.0, // Always trigger
        hardClearRatio: 1.0, // Never hard clear
        minPrunableToolChars: 100,
        keepLastAssistants: 3,
-        softTrim: {
-          maxChars: 2_000, // Trigger on the large result
-          headChars: 500,
-          tailChars: 500,
-        },
-        hardClear: {
-          enabled: false,
-          placeholder: "[Content removed]",
-        },
+        softTrim: { maxChars: 2_000, headChars: 500, tailChars: 500 },
+        hardClear: { enabled: false, placeholder: "[Content removed]" },
      },
    });

-    // Find the soft-trimmed message
-    if (result.changed && result.softTrimmed > 0) {
-      const trimmedMsg = result.messages[1] as any;
-      const text = trimmedMsg.content[0]?.text ?? trimmedMsg.content[0]?.content ?? "";
-      // The artifact reference should be preserved in the trim note
-      expect(text).toContain("artifacts/call_abc123.txt");
-    }
+    // Must actually trigger soft trimming
+    expect(result.changed).toBe(true);
+    expect(result.softTrimmed).toBeGreaterThan(0);
+
+    // The trimmed message should preserve the artifact reference (index 2 due to prepended user msg)
+    const trimmedMsg = result.messages[2] as any;
+    const text = extractContentText(trimmedMsg.content[0]?.content ?? trimmedMsg.content[0]);
+    expect(text).toContain("artifacts/call_abc123.txt");
  });

-  it("hard clear preserves artifact reference", () => {
+  // UC2: Hard clear preserves artifact reference
+  it("UC2: hard clear preserves artifact reference in placeholder", () => {
    const truncatedContent =
      "X".repeat(80_000) +
      "\n\n[Tool result truncated: Full result saved to artifacts/call_xyz.txt.]\n\n" +
      "Y".repeat(20_000);

    const messages: AgentMessage[] = [
+      makeUserMessage("start"),
      makeAssistantMessage("old"),
      makeToolResultMessage(truncatedContent),
-      // Add enough recent messages to push the old one into hard-clear range
      makeAssistantMessage("a1"),
      makeToolResultMessage("r1"),
      makeAssistantMessage("a2"),
@ -110,45 +125,69 @@ describe("Phase 2 E2E: Artifact-Aware Pruning", () => {
      contextWindowTokens: 2_000,
      settings: {
        softTrimRatio: 0.0,
-        hardClearRatio: 0.0, // Always trigger hard clear
+        hardClearRatio: 0.0, // Always trigger
        minPrunableToolChars: 100,
        keepLastAssistants: 3,
-        softTrim: {
-          maxChars: 50, // Everything over 50 gets soft trimmed first
-          headChars: 20,
-          tailChars: 20,
-        },
-        hardClear: {
-          enabled: true,
-          placeholder: "[Content removed]",
-        },
+        softTrim: { maxChars: 50, headChars: 20, tailChars: 20 },
+        hardClear: { enabled: true, placeholder: "[Content removed]" },
      },
    });

-    if (result.changed && result.hardCleared > 0) {
-      // Find the hard-cleared message (should be messages[1])
-      const clearedMsg = result.messages[1] as any;
-      const text = clearedMsg.content[0]?.text ?? "";
-      expect(text).toContain("[Content removed]");
-      expect(text).toContain("artifacts/call_xyz.txt");
-    }
+    expect(result.changed).toBe(true);
+    expect(result.hardCleared).toBeGreaterThan(0);
+
+    // The hard-cleared message should contain both the placeholder AND the artifact ref
+    const clearedMsg = result.messages[2] as any;
+    const text = extractContentText(clearedMsg.content[0]?.content ?? clearedMsg.content[0]);
+    expect(text).toContain("[Content removed]");
+    expect(text).toContain("artifacts/call_xyz.txt");
+  });
+
+  // UC3: Soft trim without artifact ref (baseline behavior unchanged)
+  it("UC3: soft trim without artifact reference works normally", () => {
+    const plainContent = "D".repeat(6_000); // No artifact reference
+
+    const messages: AgentMessage[] = [
+      makeUserMessage("start"),
+      makeAssistantMessage("call"),
+      makeToolResultMessage(plainContent),
+      makeAssistantMessage("r1"),
+      makeToolResultMessage("s"),
+      makeAssistantMessage("r2"),
+      makeToolResultMessage("s"),
+      makeAssistantMessage("r3"),
+      makeToolResultMessage("s"),
+    ];
+
+    const result = pruneToolResults({
+      messages,
+      contextWindowTokens: 5_000,
+      settings: {
+        softTrimRatio: 0.0,
+        hardClearRatio: 1.0,
+        minPrunableToolChars: 100,
+        keepLastAssistants: 3,
+        softTrim: { maxChars: 2_000, headChars: 500, tailChars: 500 },
+        hardClear: { enabled: false, placeholder: "" },
+      },
+    });
+
+    expect(result.changed).toBe(true);
+    expect(result.softTrimmed).toBeGreaterThan(0);
+
+    const trimmedMsg = result.messages[2] as any;
+    const text = extractContentText(trimmedMsg.content[0]?.content ?? trimmedMsg.content[0]);
+    // Should have trim note but no artifact reference
+    expect(text).toContain("Tool result trimmed");
+    expect(text).not.toContain("artifacts/");
  });
 });

 describe("Phase 2 E2E: Summary Fallback Artifact Extraction", () => {
-  it("DEFAULT_SUMMARY_INSTRUCTIONS mentions artifacts", async () => {
-    // Read the summarization module to verify instructions include artifact guidance
-    const { DEFAULT_SUMMARY_INSTRUCTIONS } = await import("./summarization.js") as any;
-    // The instructions are a module-level const, but not exported. Let's verify via
-    // the splitMessagesForSummary path that exercises the flow indirectly.
-    // Instead, let's verify the artifact detection in summary-fallback.
-  });
-
-  it("summary fallback includes artifact references section", async () => {
-    // Import the module to access the plain text fallback
+  // UC4: summary fallback extracts artifact references
+  it("UC4: summary fallback includes 'Saved Artifacts' section with all artifact refs", async () => {
    const mod = await import("./summary-fallback.js");

-    // Create messages with artifact references embedded in tool results
    const messages: AgentMessage[] = [
      makeAssistantMessage("Let me read the file"),
      {
@ -181,33 +220,140 @@ describe("Phase 2 E2E: Summary Fallback Artifact Extraction", () => {
      } as any,
    ];

-    // Use summarizeWithFallback to exercise the full flow — but this requires
-    // an LLM model. Instead, we can test the behavior by causing all levels to fail.
-    // The summarizeWithFallback will fall through to Level 3 (plain text) if the model fails.
-    // Let's create a mock model that always throws.
+    // Force Level 3 fallback (plain text) by using a model that always throws
    const failingModel = {
-      complete: () => { throw new Error("Test: no LLM available"); },
+      complete: () => { throw new Error("Test: no LLM"); },
    };

-    try {
-      const result = await mod.summarizeWithFallback({
-        messages,
-        model: failingModel as any,
-        reserveTokens: 1024,
-        apiKey: "test-key",
-        instructions: "summarize",
-        availableTokens: 100_000,
-      });
+    const result = await mod.summarizeWithFallback({
+      messages,
+      model: failingModel as any,
+      reserveTokens: 1024,
+      apiKey: "test-key",
+      instructions: "summarize",
+      availableTokens: 100_000,
+    });

-      // Should fall through to Level 3 (plain-text fallback)
-      expect(result.level).toBe(3);
-      // The summary should contain artifact references
-      expect(result.summary).toContain("## Saved Artifacts");
-      expect(result.summary).toContain("artifacts/call_1.txt");
-      expect(result.summary).toContain("artifacts/call_2.txt");
-    } catch {
-      // If generateSummary isn't available as expected, at least verify
-      // the artifact extraction pattern works at the module level
-    }
+    // Must fall through to Level 3
+    expect(result.level).toBe(3);
+    // Summary must contain artifact references
+    expect(result.summary).toContain("## Saved Artifacts");
+    expect(result.summary).toContain("artifacts/call_1.txt");
+    expect(result.summary).toContain("artifacts/call_2.txt");
+  });
+});
+
+describe("Cross-Phase E2E: Phase 1 → Phase 2 Pipeline", () => {
+  // UC5: Phase 1 truncation output → Phase 2 pruning — artifact ref survives
+  it("UC5: artifact ref from Phase 1 truncation survives Phase 2 soft trim", () => {
+    // Phase 1: truncate an oversized tool result
+    const bigContent = "ORIGINAL_DATA_" + "Q".repeat(200_000);
+    let artifactPath = "";
+
+    const phase1Result = truncateOversizedToolResults({
+      message: {
+        role: "user",
+        content: [{ type: "tool_result", tool_use_id: "call_cross", content: bigContent }],
+        timestamp: Date.now(),
+      } as any,
+      contextWindowTokens: 50_000,
+      saveArtifact: (_id, _content) => {
+        artifactPath = `artifacts/call_cross.txt`;
+        return artifactPath;
+      },
+    });
+
+    // Phase 1 must have truncated
+    expect(phase1Result.truncated).toBe(true);
+    expect(phase1Result.artifacts.length).toBe(1);
+    expect(phase1Result.artifacts[0]!.toolCallId).toBe("call_cross");
+
+    // Extract the truncated text from Phase 1 output
+    const phase1Msg = phase1Result.message as any;
+    const phase1Text = extractContentText(phase1Msg.content[0].content);
+    expect(phase1Text).toContain("artifacts/call_cross.txt");
+
+    // Phase 2: feed Phase 1 output into pruneToolResults
+    const messages: AgentMessage[] = [
+      makeUserMessage("start"),
+      makeAssistantMessage("calling"),
+      phase1Result.message, // This is the Phase 1 truncated message
+      makeAssistantMessage("a1"),
+      makeToolResultMessage("s1"),
+      makeAssistantMessage("a2"),
+      makeToolResultMessage("s2"),
+      makeAssistantMessage("a3"),
+      makeToolResultMessage("s3"),
+    ];
+
+    const phase2Result = pruneToolResults({
+      messages,
+      contextWindowTokens: 3_000,
+      settings: {
+        softTrimRatio: 0.0, // Always trigger
+        hardClearRatio: 1.0, // No hard clear
+        minPrunableToolChars: 100,
+        keepLastAssistants: 3,
+        softTrim: { maxChars: 2_000, headChars: 500, tailChars: 500 },
+        hardClear: { enabled: false, placeholder: "" },
+      },
+    });
+
+    expect(phase2Result.changed).toBe(true);
+
+    // The artifact reference must survive the Phase 2 soft trim (index 2 due to prepended user msg)
+    const finalMsg = phase2Result.messages[2] as any;
+    const finalText = extractContentText(finalMsg.content[0]?.content ?? finalMsg.content[0]);
+    expect(finalText).toContain("artifacts/call_cross.txt");
+  });
+
+  // UC5b: Phase 1 → Phase 2 hard clear also preserves
+  it("UC5b: artifact ref from Phase 1 truncation survives Phase 2 hard clear", () => {
+    const bigContent = "HC_DATA_" + "W".repeat(200_000);
+
+    const phase1Result = truncateOversizedToolResults({
+      message: {
+        role: "user",
+        content: [{ type: "tool_result", tool_use_id: "call_hc", content: bigContent }],
+        timestamp: Date.now(),
+      } as any,
+      contextWindowTokens: 50_000,
+      saveArtifact: () => "artifacts/call_hc.txt",
+    });
+
+    expect(phase1Result.truncated).toBe(true);
+
+    const messages: AgentMessage[] = [
+      makeUserMessage("start"),
+      makeAssistantMessage("calling"),
+      phase1Result.message,
+      makeAssistantMessage("a1"),
+      makeToolResultMessage("s1"),
+      makeAssistantMessage("a2"),
+      makeToolResultMessage("s2"),
+      makeAssistantMessage("a3"),
+      makeToolResultMessage("s3"),
+    ];
+
+    const phase2Result = pruneToolResults({
+      messages,
+      contextWindowTokens: 1_000,
+      settings: {
+        softTrimRatio: 0.0,
+        hardClearRatio: 0.0, // Always hard clear
+        minPrunableToolChars: 100,
+        keepLastAssistants: 3,
+        softTrim: { maxChars: 50, headChars: 20, tailChars: 20 },
+        hardClear: { enabled: true, placeholder: "[Cleared]" },
+      },
+    });
+
+    expect(phase2Result.changed).toBe(true);
+    expect(phase2Result.hardCleared).toBeGreaterThan(0);
+
+    const finalMsg = phase2Result.messages[2] as any;
+    const finalText = extractContentText(finalMsg.content[0]?.content ?? finalMsg.content[0]);
+    expect(finalText).toContain("[Cleared]");
+    expect(finalText).toContain("artifacts/call_hc.txt");
  });
 });
--- a/packages/core/src/agent/session/artifact-integration.test.ts
+++ b/packages/core/src/agent/session/artifact-integration.test.ts
@ -1,7 +1,21 @@
 /**
 * E2E Integration Test: Phase 1 — Artifact Storage + Pre-emptive Truncation
 *
- * Tests the full flow: SessionManager → truncateOversizedToolResults → artifact-store
+ * Tests the full flow: SessionManager.saveMessage() → truncateOversizedToolResults → artifact-store
+ *
+ * Test Matrix:
+ * ┌─────────────────────────────────────────┬──────────────────────┐
+ * │ Use Case                                │ Expected Outcome     │
+ * ├─────────────────────────────────────────┼──────────────────────┤
+ * │ UC1: Oversized tool result              │ Truncated + artifact │
+ * │ UC2: Small tool result                  │ Pass-through, no art │
+ * │ UC3: Head/tail preservation             │ Markers preserved    │
+ * │ UC4: Multiple results (mixed sizes)     │ Selective truncation │
+ * │ UC5: Feature toggle disabled            │ No truncation        │
+ * │ UC6: Session reload after truncation    │ Truncated content    │
+ * │ UC7: Truncation marker format           │ Correct format       │
+ * │ UC8: Artifact readable after reload     │ Full content intact  │
+ * └─────────────────────────────────────────┴──────────────────────┘
 */
 import { describe, it, expect, beforeEach, afterEach } from "vitest";
 import { mkdirSync, rmSync, existsSync } from "node:fs";
@ -45,7 +59,8 @@ describe("Phase 1 E2E: Artifact Storage + Pre-emptive Truncation", () => {
    rmSync(testDir, { recursive: true, force: true });
  });

-  it("saves oversized tool result to artifact and truncates in session", async () => {
+  // UC1: Oversized tool result → truncated in session + artifact saved
+  it("UC1: oversized tool result is truncated and artifact is saved with full content", async () => {
    const sm = new SessionManager({
      sessionId,
      baseDir: testDir,
@ -55,24 +70,15 @@ describe("Phase 1 E2E: Artifact Storage + Pre-emptive Truncation", () => {
      enableToolResultPruning: false,
    });

-    // Create an oversized tool result (> 30% of 100k * 4 chars = 120k chars)
    const bigContent = "X".repeat(200_000);
-    const userMessage = {
-      role: "user" as const,
-      content: [
-        {
-          type: "tool_result" as const,
-          tool_use_id: "call_abc123",
-          content: bigContent,
-        },
-      ],
+    sm.saveMessage({
+      role: "user",
+      content: [{ type: "tool_result", tool_use_id: "call_abc123", content: bigContent }],
      timestamp: Date.now(),
-    };
-
-    sm.saveMessage(userMessage);
+    } as any);
    await sm.flush();

-    // Verify: session file has truncated content
+    // Session file: truncated
    const entries = readEntries(sessionId, { baseDir: testDir });
    const msgEntries = entries.filter((e) => e.type === "message");
    expect(msgEntries.length).toBe(1);
@ -83,12 +89,14 @@ describe("Phase 1 E2E: Artifact Storage + Pre-emptive Truncation", () => {
    expect(savedText).toContain("Tool result truncated");
    expect(savedText).toContain("artifacts/");

-    // Verify: artifact file exists with full content
+    // Artifact: full content preserved
    const artifactContent = readToolResultArtifact(sessionId, "call_abc123", { baseDir: testDir });
    expect(artifactContent).toBe(bigContent);
+    expect(artifactContent!.length).toBe(200_000);
  });

-  it("does NOT create artifact for small tool results", async () => {
+  // UC2: Small tool result → pass-through, no artifact
+  it("UC2: small tool result passes through without truncation or artifact", async () => {
    const sm = new SessionManager({
      sessionId,
      baseDir: testDir,
@ -99,76 +107,60 @@ describe("Phase 1 E2E: Artifact Storage + Pre-emptive Truncation", () => {
    });

    const smallContent = "Small result data";
-    const userMessage = {
-      role: "user" as const,
-      content: [
-        {
-          type: "tool_result" as const,
-          tool_use_id: "call_small",
-          content: smallContent,
-        },
-      ],
+    sm.saveMessage({
+      role: "user",
+      content: [{ type: "tool_result", tool_use_id: "call_small", content: smallContent }],
      timestamp: Date.now(),
-    };
-
-    sm.saveMessage(userMessage);
+    } as any);
    await sm.flush();

-    // Verify: session file has full content (no truncation)
+    // Session file: unchanged content
    const entries = readEntries(sessionId, { baseDir: testDir });
    const saved = (entries.find((e) => e.type === "message") as any).message;
    const savedText = extractContentText(saved.content[0].content);
    expect(savedText).toBe(smallContent);

-    // Verify: no artifacts directory created
-    const artifactsDir = join(testDir, "sessions", sessionId, "artifacts");
+    // No artifacts directory
+    const artifactsDir = join(testDir, sessionId, "artifacts");
    expect(existsSync(artifactsDir)).toBe(false);
  });

-  it("truncated message preserves head and tail of original content", async () => {
+  // UC3: Head/tail preservation
+  it("UC3: truncated content preserves identifiable head and tail markers", async () => {
    const sm = new SessionManager({
      sessionId,
      baseDir: testDir,
      compactionMode: "tokens",
-      contextWindowTokens: 50_000, // smaller window → lower threshold
+      contextWindowTokens: 50_000,
      enableToolResultTruncation: true,
      enableToolResultPruning: false,
    });

-    // Create content with identifiable head and tail
-    const head = "HEAD_MARKER_" + "A".repeat(10_000);
+    const head = "HEAD_MARKER_START" + "A".repeat(10_000);
    const middle = "B".repeat(100_000);
-    const tail = "C".repeat(10_000) + "_TAIL_MARKER";
+    const tail = "C".repeat(10_000) + "TAIL_MARKER_END";
    const bigContent = head + middle + tail;

-    const userMessage = {
-      role: "user" as const,
-      content: [
-        {
-          type: "tool_result" as const,
-          tool_use_id: "call_headtail",
-          content: bigContent,
-        },
-      ],
+    sm.saveMessage({
+      role: "user",
+      content: [{ type: "tool_result", tool_use_id: "call_ht", content: bigContent }],
      timestamp: Date.now(),
-    };
-
-    sm.saveMessage(userMessage);
+    } as any);
    await sm.flush();

    const entries = readEntries(sessionId, { baseDir: testDir });
    const saved = (entries.find((e) => e.type === "message") as any).message;
    const savedText = extractContentText(saved.content[0].content);

-    // Head should be preserved
-    expect(savedText).toContain("HEAD_MARKER_");
-    // Tail should be preserved
-    expect(savedText).toContain("_TAIL_MARKER");
-    // Middle should be truncated
+    expect(savedText).toContain("HEAD_MARKER_START");
+    expect(savedText).toContain("TAIL_MARKER_END");
    expect(savedText.length).toBeLessThan(bigContent.length);
+    // Must also have the truncation marker
+    expect(savedText).toContain("Tool result truncated");
  });

-  it("handles multiple tool results in same message", async () => {
+  // UC4: Multiple tool results — selective truncation
+  it("UC4: message with mixed-size tool results truncates only oversized ones", async () => {
    const sm = new SessionManager({
      sessionId,
      baseDir: testDir,
@ -178,69 +170,160 @@ describe("Phase 1 E2E: Artifact Storage + Pre-emptive Truncation", () => {
      enableToolResultPruning: false,
    });

-    const bigContent1 = "RESULT1_" + "X".repeat(200_000);
-    const smallContent = "small result";
-    const bigContent2 = "RESULT2_" + "Y".repeat(200_000);
+    const big1 = "BIG1_" + "X".repeat(200_000);
+    const small = "SMALL_RESULT_INTACT";
+    const big2 = "BIG2_" + "Y".repeat(200_000);

-    const userMessage = {
-      role: "user" as const,
+    sm.saveMessage({
+      role: "user",
      content: [
-        { type: "tool_result" as const, tool_use_id: "call_big1", content: bigContent1 },
-        { type: "tool_result" as const, tool_use_id: "call_small", content: smallContent },
-        { type: "tool_result" as const, tool_use_id: "call_big2", content: bigContent2 },
+        { type: "tool_result", tool_use_id: "call_big1", content: big1 },
+        { type: "tool_result", tool_use_id: "call_sm", content: small },
+        { type: "tool_result", tool_use_id: "call_big2", content: big2 },
      ],
      timestamp: Date.now(),
-    };
-
-    sm.saveMessage(userMessage);
+    } as any);
    await sm.flush();

    const entries = readEntries(sessionId, { baseDir: testDir });
    const saved = (entries.find((e) => e.type === "message") as any).message;

-    // Big results should be truncated
-    const text0 = extractContentText(saved.content[0].content);
-    const text2 = extractContentText(saved.content[2].content);
-    expect(text0).toContain("Tool result truncated");
-    expect(text2).toContain("Tool result truncated");
+    // Big results: truncated
+    const t0 = extractContentText(saved.content[0].content);
+    const t2 = extractContentText(saved.content[2].content);
+    expect(t0).toContain("Tool result truncated");
+    expect(t2).toContain("Tool result truncated");
+    expect(t0.length).toBeLessThan(big1.length);
+    expect(t2.length).toBeLessThan(big2.length);

-    // Small result should be unchanged
-    const text1 = extractContentText(saved.content[1].content);
-    expect(text1).toBe(smallContent);
+    // Small result: intact
+    const t1 = extractContentText(saved.content[1].content);
+    expect(t1).toBe(small);

-    // Both artifacts should exist
+    // Both artifacts saved with full content
    const art1 = readToolResultArtifact(sessionId, "call_big1", { baseDir: testDir });
-    expect(art1).toContain("RESULT1_");
+    expect(art1).toBe(big1);
    const art2 = readToolResultArtifact(sessionId, "call_big2", { baseDir: testDir });
-    expect(art2).toContain("RESULT2_");
+    expect(art2).toBe(big2);
  });

-  it("respects enableToolResultTruncation=false", async () => {
+  // UC5: Feature disabled → no truncation
+  it("UC5: enableToolResultTruncation=false skips all truncation", async () => {
    const sm = new SessionManager({
      sessionId,
      baseDir: testDir,
      compactionMode: "tokens",
      contextWindowTokens: 50_000,
-      enableToolResultTruncation: false, // Disabled
+      enableToolResultTruncation: false,
      enableToolResultPruning: false,
    });

    const bigContent = "Z".repeat(200_000);
-    const userMessage = {
-      role: "user" as const,
-      content: [
-        { type: "tool_result" as const, tool_use_id: "call_noop", content: bigContent },
-      ],
+    sm.saveMessage({
+      role: "user",
+      content: [{ type: "tool_result", tool_use_id: "call_noop", content: bigContent }],
      timestamp: Date.now(),
-    };
-
-    sm.saveMessage(userMessage);
+    } as any);
    await sm.flush();

    const entries = readEntries(sessionId, { baseDir: testDir });
    const saved = (entries.find((e) => e.type === "message") as any).message;
-    // Should NOT be truncated since feature is disabled
    const savedText = extractContentText(saved.content[0].content);
    expect(savedText).toBe(bigContent);
+    expect(savedText).not.toContain("Tool result truncated");
+  });
+
+  // UC6: Session reload after truncation
+  it("UC6: loadMessages() returns truncated content after save+reload", async () => {
+    const sm = new SessionManager({
+      sessionId,
+      baseDir: testDir,
+      compactionMode: "tokens",
+      contextWindowTokens: 100_000,
+      enableToolResultTruncation: true,
+      enableToolResultPruning: false,
+    });
+
+    const bigContent = "RELOAD_TEST_" + "R".repeat(200_000);
+    sm.saveMessage({
+      role: "user",
+      content: [{ type: "tool_result", tool_use_id: "call_reload", content: bigContent }],
+      timestamp: Date.now(),
+    } as any);
+    await sm.flush();
+
+    // Create a fresh SessionManager to reload
+    const sm2 = new SessionManager({
+      sessionId,
+      baseDir: testDir,
+      compactionMode: "tokens",
+      contextWindowTokens: 100_000,
+    });
+    const messages = sm2.loadMessages();
+    expect(messages.length).toBe(1);
+
+    const loaded = messages[0] as any;
+    const loadedText = extractContentText(loaded.content[0].content);
+    // Loaded messages should show truncated content (not full)
+    expect(loadedText).toContain("Tool result truncated");
+    expect(loadedText).toContain("artifacts/");
+    expect(loadedText.length).toBeLessThan(bigContent.length);
+  });
+
+  // UC7: Truncation marker format
+  it("UC7: truncation marker contains original size and artifact path", async () => {
+    const sm = new SessionManager({
+      sessionId,
+      baseDir: testDir,
+      compactionMode: "tokens",
+      contextWindowTokens: 100_000,
+      enableToolResultTruncation: true,
+      enableToolResultPruning: false,
+    });
+
+    const bigContent = "M".repeat(200_000);
+    sm.saveMessage({
+      role: "user",
+      content: [{ type: "tool_result", tool_use_id: "call_fmt", content: bigContent }],
+      timestamp: Date.now(),
+    } as any);
+    await sm.flush();
+
+    const entries = readEntries(sessionId, { baseDir: testDir });
+    const saved = (entries.find((e) => e.type === "message") as any).message;
+    const savedText = extractContentText(saved.content[0].content);
+
+    // Marker should include: original size, artifact path, and "read tool" hint
+    expect(savedText).toMatch(/original 200000 chars/);
+    expect(savedText).toMatch(/Full result saved to artifacts\/call_fmt\.txt/);
+    expect(savedText).toContain("read tool");
+  });
+
+  // UC8: Artifact readable via readToolResultArtifact after session operations
+  it("UC8: artifact is readable by toolCallId and contains exact original content", async () => {
+    const sm = new SessionManager({
+      sessionId,
+      baseDir: testDir,
+      compactionMode: "tokens",
+      contextWindowTokens: 100_000,
+      enableToolResultTruncation: true,
+      enableToolResultPruning: false,
+    });
+
+    // Use content with specific patterns to verify exact preservation
+    const specialContent = "START|" + "αβγδ".repeat(50_000) + "|END";
+    sm.saveMessage({
+      role: "user",
+      content: [{ type: "tool_result", tool_use_id: "call_exact", content: specialContent }],
+      timestamp: Date.now(),
+    } as any);
+    await sm.flush();
+
+    const artifact = readToolResultArtifact(sessionId, "call_exact", { baseDir: testDir });
+    expect(artifact).toBe(specialContent);
+
+    // Also verify the artifacts directory exists
+    const artifactsDir = join(testDir, sessionId, "artifacts");
+    expect(existsSync(artifactsDir)).toBe(true);
  });
 });