From b15e1eeb2ac21edc52a074935cd23aab0f322ad5 Mon Sep 17 00:00:00 2001
From: Jiayuan Zhang <forrestchang7@gmail.com>
Date: Sun, 15 Feb 2026 23:13:12 +0800
Subject: [PATCH] test(compaction): harden E2E integration tests for artifact
 pipeline
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Add real user messages for bootstrap protection in pruning tests
- Fix artifact directory path assertions (baseDir vs sessions/baseDir)
- Add cross-phase tests (Phase 1 truncation → Phase 2 pruning)
- Remove conditional assertion guards that could silently skip checks
- All 30 E2E integration tests now pass with mandatory assertions

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .../artifact-pruning-integration.test.ts      | 322 +++++++++++++-----
 .../session/artifact-integration.test.ts      | 261 +++++++++-----
 2 files changed, 406 insertions(+), 177 deletions(-)

diff --git a/packages/core/src/agent/context-window/artifact-pruning-integration.test.ts b/packages/core/src/agent/context-window/artifact-pruning-integration.test.ts
index a642c052..152aae4d 100644
--- a/packages/core/src/agent/context-window/artifact-pruning-integration.test.ts
+++ b/packages/core/src/agent/context-window/artifact-pruning-integration.test.ts
@@ -1,26 +1,26 @@
 /**
  * E2E Integration Test: Phase 2 — Artifact-Aware Pruning + Summary Fallback
  *
- * Tests that tool result pruning preserves artifact references
- * and that summary fallback extracts artifact paths.
+ * Test Matrix:
+ * ┌──────────────────────────────────────────────┬──────────────────────────────┐
+ * │ Use Case                                     │ Expected Outcome             │
+ * ├──────────────────────────────────────────────┼──────────────────────────────┤
+ * │ UC1: Soft trim with artifact ref             │ Artifact ref in trim note    │
+ * │ UC2: Hard clear with artifact ref            │ Artifact ref in placeholder  │
+ * │ UC3: Soft trim without artifact ref          │ Normal trim (no artifact)    │
+ * │ UC4: Summary fallback extracts artifact refs │ "Saved Artifacts" section    │
+ * │ UC5: Cross-phase: Phase1 output → Phase2     │ Ref survives full pipeline   │
+ * └──────────────────────────────────────────────┴──────────────────────────────┘
  */
 import { describe, it, expect } from "vitest";
 import { pruneToolResults } from "./tool-result-pruning.js";
+import { truncateOversizedToolResults } from "./tool-result-truncation.js";
 import type { AgentMessage } from "@mariozechner/pi-agent-core";
 
-/**
- * Helper: build a user message with a single tool_result containing the given text.
- */
 function makeToolResultMessage(text: string, toolUseId = "call_1"): AgentMessage {
   return {
     role: "user",
-    content: [
-      {
-        type: "tool_result",
-        tool_use_id: toolUseId,
-        content: text,
-      },
-    ],
+    content: [{ type: "tool_result", tool_use_id: toolUseId, content: text }],
     timestamp: Date.now(),
   } as any;
 }
@@ -33,17 +33,37 @@ function makeAssistantMessage(text = "OK"): AgentMessage {
   } as any;
 }
 
+/** A real user message (not tool_result) — needed for bootstrap protection in pruneToolResults */
+function makeUserMessage(text = "Hello"): AgentMessage {
+  return {
+    role: "user",
+    content: text,
+    timestamp: Date.now(),
+  } as any;
+}
+
+function extractContentText(content: unknown): string {
+  if (typeof content === "string") return content;
+  if (Array.isArray(content)) {
+    return content
+      .filter((b: any) => b?.type === "text")
+      .map((b: any) => b.text)
+      .join("");
+  }
+  return "";
+}
+
 describe("Phase 2 E2E: Artifact-Aware Pruning", () => {
-  it("soft trim preserves artifact reference from pre-emptive truncation", () => {
-    // Simulate a tool result that was previously truncated by Phase 1 and contains an artifact ref
+  // UC1: Soft trim preserves artifact reference
+  it("UC1: soft trim preserves artifact reference in trimmed note", () => {
+    // Tool result with an artifact reference from Phase 1 truncation
     const truncatedContent =
       "A".repeat(3000) +
       "\n\n[Tool result truncated: original 200000 chars. Full result saved to artifacts/call_abc123.txt. Use the read tool to access the complete data if needed.]\n\n" +
       "B".repeat(3000);
 
-    // Build conversation that should trigger soft trimming
-    // Put older messages first (these get pruned), recent ones are protected
     const messages: AgentMessage[] = [
+      makeUserMessage("start"),
       makeAssistantMessage("Calling tool..."),
       makeToolResultMessage(truncatedContent),
       makeAssistantMessage("Processing..."),
@@ -58,43 +78,38 @@ describe("Phase 2 E2E: Artifact-Aware Pruning", () => {
 
     const result = pruneToolResults({
       messages,
-      contextWindowTokens: 5_000, // Small window to trigger pruning
+      contextWindowTokens: 5_000,
       settings: {
-        softTrimRatio: 0.0, // Always trigger soft trim
+        softTrimRatio: 0.0, // Always trigger
         hardClearRatio: 1.0, // Never hard clear
         minPrunableToolChars: 100,
         keepLastAssistants: 3,
-        softTrim: {
-          maxChars: 2_000, // Trigger on the large result
-          headChars: 500,
-          tailChars: 500,
-        },
-        hardClear: {
-          enabled: false,
-          placeholder: "[Content removed]",
-        },
+        softTrim: { maxChars: 2_000, headChars: 500, tailChars: 500 },
+        hardClear: { enabled: false, placeholder: "[Content removed]" },
       },
     });
 
-    // Find the soft-trimmed message
-    if (result.changed && result.softTrimmed > 0) {
-      const trimmedMsg = result.messages[1] as any;
-      const text = trimmedMsg.content[0]?.text ?? trimmedMsg.content[0]?.content ?? "";
-      // The artifact reference should be preserved in the trim note
-      expect(text).toContain("artifacts/call_abc123.txt");
-    }
+    // Must actually trigger soft trimming
+    expect(result.changed).toBe(true);
+    expect(result.softTrimmed).toBeGreaterThan(0);
+
+    // The trimmed message should preserve the artifact reference (index 2 due to prepended user msg)
+    const trimmedMsg = result.messages[2] as any;
+    const text = extractContentText(trimmedMsg.content[0]?.content ?? trimmedMsg.content[0]);
+    expect(text).toContain("artifacts/call_abc123.txt");
   });
 
-  it("hard clear preserves artifact reference", () => {
+  // UC2: Hard clear preserves artifact reference
+  it("UC2: hard clear preserves artifact reference in placeholder", () => {
     const truncatedContent =
       "X".repeat(80_000) +
       "\n\n[Tool result truncated: Full result saved to artifacts/call_xyz.txt.]\n\n" +
       "Y".repeat(20_000);
 
     const messages: AgentMessage[] = [
+      makeUserMessage("start"),
       makeAssistantMessage("old"),
       makeToolResultMessage(truncatedContent),
-      // Add enough recent messages to push the old one into hard-clear range
       makeAssistantMessage("a1"),
       makeToolResultMessage("r1"),
       makeAssistantMessage("a2"),
@@ -110,45 +125,69 @@ describe("Phase 2 E2E: Artifact-Aware Pruning", () => {
       contextWindowTokens: 2_000,
       settings: {
         softTrimRatio: 0.0,
-        hardClearRatio: 0.0, // Always trigger hard clear
+        hardClearRatio: 0.0, // Always trigger
         minPrunableToolChars: 100,
         keepLastAssistants: 3,
-        softTrim: {
-          maxChars: 50, // Everything over 50 gets soft trimmed first
-          headChars: 20,
-          tailChars: 20,
-        },
-        hardClear: {
-          enabled: true,
-          placeholder: "[Content removed]",
-        },
+        softTrim: { maxChars: 50, headChars: 20, tailChars: 20 },
+        hardClear: { enabled: true, placeholder: "[Content removed]" },
       },
     });
 
-    if (result.changed && result.hardCleared > 0) {
-      // Find the hard-cleared message (should be messages[1])
-      const clearedMsg = result.messages[1] as any;
-      const text = clearedMsg.content[0]?.text ?? "";
-      expect(text).toContain("[Content removed]");
-      expect(text).toContain("artifacts/call_xyz.txt");
-    }
+    expect(result.changed).toBe(true);
+    expect(result.hardCleared).toBeGreaterThan(0);
+
+    // The hard-cleared message should contain both the placeholder AND the artifact ref
+    const clearedMsg = result.messages[2] as any;
+    const text = extractContentText(clearedMsg.content[0]?.content ?? clearedMsg.content[0]);
+    expect(text).toContain("[Content removed]");
+    expect(text).toContain("artifacts/call_xyz.txt");
+  });
+
+  // UC3: Soft trim without artifact ref (baseline behavior unchanged)
+  it("UC3: soft trim without artifact reference works normally", () => {
+    const plainContent = "D".repeat(6_000); // No artifact reference
+
+    const messages: AgentMessage[] = [
+      makeUserMessage("start"),
+      makeAssistantMessage("call"),
+      makeToolResultMessage(plainContent),
+      makeAssistantMessage("r1"),
+      makeToolResultMessage("s"),
+      makeAssistantMessage("r2"),
+      makeToolResultMessage("s"),
+      makeAssistantMessage("r3"),
+      makeToolResultMessage("s"),
+    ];
+
+    const result = pruneToolResults({
+      messages,
+      contextWindowTokens: 5_000,
+      settings: {
+        softTrimRatio: 0.0,
+        hardClearRatio: 1.0,
+        minPrunableToolChars: 100,
+        keepLastAssistants: 3,
+        softTrim: { maxChars: 2_000, headChars: 500, tailChars: 500 },
+        hardClear: { enabled: false, placeholder: "" },
+      },
+    });
+
+    expect(result.changed).toBe(true);
+    expect(result.softTrimmed).toBeGreaterThan(0);
+
+    const trimmedMsg = result.messages[2] as any;
+    const text = extractContentText(trimmedMsg.content[0]?.content ?? trimmedMsg.content[0]);
+    // Should have trim note but no artifact reference
+    expect(text).toContain("Tool result trimmed");
+    expect(text).not.toContain("artifacts/");
   });
 });
 
 describe("Phase 2 E2E: Summary Fallback Artifact Extraction", () => {
-  it("DEFAULT_SUMMARY_INSTRUCTIONS mentions artifacts", async () => {
-    // Read the summarization module to verify instructions include artifact guidance
-    const { DEFAULT_SUMMARY_INSTRUCTIONS } = await import("./summarization.js") as any;
-    // The instructions are a module-level const, but not exported. Let's verify via
-    // the splitMessagesForSummary path that exercises the flow indirectly.
-    // Instead, let's verify the artifact detection in summary-fallback.
-  });
-
-  it("summary fallback includes artifact references section", async () => {
-    // Import the module to access the plain text fallback
+  // UC4: summary fallback extracts artifact references
+  it("UC4: summary fallback includes 'Saved Artifacts' section with all artifact refs", async () => {
     const mod = await import("./summary-fallback.js");
 
-    // Create messages with artifact references embedded in tool results
     const messages: AgentMessage[] = [
       makeAssistantMessage("Let me read the file"),
       {
@@ -181,33 +220,140 @@ describe("Phase 2 E2E: Summary Fallback Artifact Extraction", () => {
       } as any,
     ];
 
-    // Use summarizeWithFallback to exercise the full flow — but this requires
-    // an LLM model. Instead, we can test the behavior by causing all levels to fail.
-    // The summarizeWithFallback will fall through to Level 3 (plain text) if the model fails.
-    // Let's create a mock model that always throws.
+    // Force Level 3 fallback (plain text) by using a model that always throws
     const failingModel = {
-      complete: () => { throw new Error("Test: no LLM available"); },
+      complete: () => { throw new Error("Test: no LLM"); },
     };
 
-    try {
-      const result = await mod.summarizeWithFallback({
-        messages,
-        model: failingModel as any,
-        reserveTokens: 1024,
-        apiKey: "test-key",
-        instructions: "summarize",
-        availableTokens: 100_000,
-      });
+    const result = await mod.summarizeWithFallback({
+      messages,
+      model: failingModel as any,
+      reserveTokens: 1024,
+      apiKey: "test-key",
+      instructions: "summarize",
+      availableTokens: 100_000,
+    });
 
-      // Should fall through to Level 3 (plain-text fallback)
-      expect(result.level).toBe(3);
-      // The summary should contain artifact references
-      expect(result.summary).toContain("## Saved Artifacts");
-      expect(result.summary).toContain("artifacts/call_1.txt");
-      expect(result.summary).toContain("artifacts/call_2.txt");
-    } catch {
-      // If generateSummary isn't available as expected, at least verify
-      // the artifact extraction pattern works at the module level
-    }
+    // Must fall through to Level 3
+    expect(result.level).toBe(3);
+    // Summary must contain artifact references
+    expect(result.summary).toContain("## Saved Artifacts");
+    expect(result.summary).toContain("artifacts/call_1.txt");
+    expect(result.summary).toContain("artifacts/call_2.txt");
+  });
+});
+
+describe("Cross-Phase E2E: Phase 1 → Phase 2 Pipeline", () => {
+  // UC5: Phase 1 truncation output → Phase 2 pruning — artifact ref survives
+  it("UC5: artifact ref from Phase 1 truncation survives Phase 2 soft trim", () => {
+    // Phase 1: truncate an oversized tool result
+    const bigContent = "ORIGINAL_DATA_" + "Q".repeat(200_000);
+    let artifactPath = "";
+
+    const phase1Result = truncateOversizedToolResults({
+      message: {
+        role: "user",
+        content: [{ type: "tool_result", tool_use_id: "call_cross", content: bigContent }],
+        timestamp: Date.now(),
+      } as any,
+      contextWindowTokens: 50_000,
+      saveArtifact: (_id, _content) => {
+        artifactPath = `artifacts/call_cross.txt`;
+        return artifactPath;
+      },
+    });
+
+    // Phase 1 must have truncated
+    expect(phase1Result.truncated).toBe(true);
+    expect(phase1Result.artifacts.length).toBe(1);
+    expect(phase1Result.artifacts[0]!.toolCallId).toBe("call_cross");
+
+    // Extract the truncated text from Phase 1 output
+    const phase1Msg = phase1Result.message as any;
+    const phase1Text = extractContentText(phase1Msg.content[0].content);
+    expect(phase1Text).toContain("artifacts/call_cross.txt");
+
+    // Phase 2: feed Phase 1 output into pruneToolResults
+    const messages: AgentMessage[] = [
+      makeUserMessage("start"),
+      makeAssistantMessage("calling"),
+      phase1Result.message, // This is the Phase 1 truncated message
+      makeAssistantMessage("a1"),
+      makeToolResultMessage("s1"),
+      makeAssistantMessage("a2"),
+      makeToolResultMessage("s2"),
+      makeAssistantMessage("a3"),
+      makeToolResultMessage("s3"),
+    ];
+
+    const phase2Result = pruneToolResults({
+      messages,
+      contextWindowTokens: 3_000,
+      settings: {
+        softTrimRatio: 0.0, // Always trigger
+        hardClearRatio: 1.0, // No hard clear
+        minPrunableToolChars: 100,
+        keepLastAssistants: 3,
+        softTrim: { maxChars: 2_000, headChars: 500, tailChars: 500 },
+        hardClear: { enabled: false, placeholder: "" },
+      },
+    });
+
+    expect(phase2Result.changed).toBe(true);
+
+    // The artifact reference must survive the Phase 2 soft trim (index 2 due to prepended user msg)
+    const finalMsg = phase2Result.messages[2] as any;
+    const finalText = extractContentText(finalMsg.content[0]?.content ?? finalMsg.content[0]);
+    expect(finalText).toContain("artifacts/call_cross.txt");
+  });
+
+  // UC5b: Phase 1 → Phase 2 hard clear also preserves
+  it("UC5b: artifact ref from Phase 1 truncation survives Phase 2 hard clear", () => {
+    const bigContent = "HC_DATA_" + "W".repeat(200_000);
+
+    const phase1Result = truncateOversizedToolResults({
+      message: {
+        role: "user",
+        content: [{ type: "tool_result", tool_use_id: "call_hc", content: bigContent }],
+        timestamp: Date.now(),
+      } as any,
+      contextWindowTokens: 50_000,
+      saveArtifact: () => "artifacts/call_hc.txt",
+    });
+
+    expect(phase1Result.truncated).toBe(true);
+
+    const messages: AgentMessage[] = [
+      makeUserMessage("start"),
+      makeAssistantMessage("calling"),
+      phase1Result.message,
+      makeAssistantMessage("a1"),
+      makeToolResultMessage("s1"),
+      makeAssistantMessage("a2"),
+      makeToolResultMessage("s2"),
+      makeAssistantMessage("a3"),
+      makeToolResultMessage("s3"),
+    ];
+
+    const phase2Result = pruneToolResults({
+      messages,
+      contextWindowTokens: 1_000,
+      settings: {
+        softTrimRatio: 0.0,
+        hardClearRatio: 0.0, // Always hard clear
+        minPrunableToolChars: 100,
+        keepLastAssistants: 3,
+        softTrim: { maxChars: 50, headChars: 20, tailChars: 20 },
+        hardClear: { enabled: true, placeholder: "[Cleared]" },
+      },
+    });
+
+    expect(phase2Result.changed).toBe(true);
+    expect(phase2Result.hardCleared).toBeGreaterThan(0);
+
+    const finalMsg = phase2Result.messages[2] as any;
+    const finalText = extractContentText(finalMsg.content[0]?.content ?? finalMsg.content[0]);
+    expect(finalText).toContain("[Cleared]");
+    expect(finalText).toContain("artifacts/call_hc.txt");
   });
 });
diff --git a/packages/core/src/agent/session/artifact-integration.test.ts b/packages/core/src/agent/session/artifact-integration.test.ts
index 833ef9dd..566e576b 100644
--- a/packages/core/src/agent/session/artifact-integration.test.ts
+++ b/packages/core/src/agent/session/artifact-integration.test.ts
@@ -1,7 +1,21 @@
 /**
  * E2E Integration Test: Phase 1 — Artifact Storage + Pre-emptive Truncation
  *
- * Tests the full flow: SessionManager → truncateOversizedToolResults → artifact-store
+ * Tests the full flow: SessionManager.saveMessage() → truncateOversizedToolResults → artifact-store
+ *
+ * Test Matrix:
+ * ┌─────────────────────────────────────────┬──────────────────────┐
+ * │ Use Case                                │ Expected Outcome     │
+ * ├─────────────────────────────────────────┼──────────────────────┤
+ * │ UC1: Oversized tool result              │ Truncated + artifact │
+ * │ UC2: Small tool result                  │ Pass-through, no art │
+ * │ UC3: Head/tail preservation             │ Markers preserved    │
+ * │ UC4: Multiple results (mixed sizes)     │ Selective truncation │
+ * │ UC5: Feature toggle disabled            │ No truncation        │
+ * │ UC6: Session reload after truncation    │ Truncated content    │
+ * │ UC7: Truncation marker format           │ Correct format       │
+ * │ UC8: Artifact readable after reload     │ Full content intact  │
+ * └─────────────────────────────────────────┴──────────────────────┘
  */
 import { describe, it, expect, beforeEach, afterEach } from "vitest";
 import { mkdirSync, rmSync, existsSync } from "node:fs";
@@ -45,7 +59,8 @@ describe("Phase 1 E2E: Artifact Storage + Pre-emptive Truncation", () => {
     rmSync(testDir, { recursive: true, force: true });
   });
 
-  it("saves oversized tool result to artifact and truncates in session", async () => {
+  // UC1: Oversized tool result → truncated in session + artifact saved
+  it("UC1: oversized tool result is truncated and artifact is saved with full content", async () => {
     const sm = new SessionManager({
       sessionId,
       baseDir: testDir,
@@ -55,24 +70,15 @@ describe("Phase 1 E2E: Artifact Storage + Pre-emptive Truncation", () => {
       enableToolResultPruning: false,
     });
 
-    // Create an oversized tool result (> 30% of 100k * 4 chars = 120k chars)
     const bigContent = "X".repeat(200_000);
-    const userMessage = {
-      role: "user" as const,
-      content: [
-        {
-          type: "tool_result" as const,
-          tool_use_id: "call_abc123",
-          content: bigContent,
-        },
-      ],
+    sm.saveMessage({
+      role: "user",
+      content: [{ type: "tool_result", tool_use_id: "call_abc123", content: bigContent }],
       timestamp: Date.now(),
-    };
-
-    sm.saveMessage(userMessage);
+    } as any);
     await sm.flush();
 
-    // Verify: session file has truncated content
+    // Session file: truncated
     const entries = readEntries(sessionId, { baseDir: testDir });
     const msgEntries = entries.filter((e) => e.type === "message");
     expect(msgEntries.length).toBe(1);
@@ -83,12 +89,14 @@ describe("Phase 1 E2E: Artifact Storage + Pre-emptive Truncation", () => {
     expect(savedText).toContain("Tool result truncated");
     expect(savedText).toContain("artifacts/");
 
-    // Verify: artifact file exists with full content
+    // Artifact: full content preserved
     const artifactContent = readToolResultArtifact(sessionId, "call_abc123", { baseDir: testDir });
     expect(artifactContent).toBe(bigContent);
+    expect(artifactContent!.length).toBe(200_000);
   });
 
-  it("does NOT create artifact for small tool results", async () => {
+  // UC2: Small tool result → pass-through, no artifact
+  it("UC2: small tool result passes through without truncation or artifact", async () => {
     const sm = new SessionManager({
       sessionId,
       baseDir: testDir,
@@ -99,76 +107,60 @@ describe("Phase 1 E2E: Artifact Storage + Pre-emptive Truncation", () => {
     });
 
     const smallContent = "Small result data";
-    const userMessage = {
-      role: "user" as const,
-      content: [
-        {
-          type: "tool_result" as const,
-          tool_use_id: "call_small",
-          content: smallContent,
-        },
-      ],
+    sm.saveMessage({
+      role: "user",
+      content: [{ type: "tool_result", tool_use_id: "call_small", content: smallContent }],
       timestamp: Date.now(),
-    };
-
-    sm.saveMessage(userMessage);
+    } as any);
     await sm.flush();
 
-    // Verify: session file has full content (no truncation)
+    // Session file: unchanged content
     const entries = readEntries(sessionId, { baseDir: testDir });
     const saved = (entries.find((e) => e.type === "message") as any).message;
     const savedText = extractContentText(saved.content[0].content);
     expect(savedText).toBe(smallContent);
 
-    // Verify: no artifacts directory created
-    const artifactsDir = join(testDir, "sessions", sessionId, "artifacts");
+    // No artifacts directory
+    const artifactsDir = join(testDir, sessionId, "artifacts");
     expect(existsSync(artifactsDir)).toBe(false);
   });
 
-  it("truncated message preserves head and tail of original content", async () => {
+  // UC3: Head/tail preservation
+  it("UC3: truncated content preserves identifiable head and tail markers", async () => {
     const sm = new SessionManager({
       sessionId,
       baseDir: testDir,
       compactionMode: "tokens",
-      contextWindowTokens: 50_000, // smaller window → lower threshold
+      contextWindowTokens: 50_000,
       enableToolResultTruncation: true,
       enableToolResultPruning: false,
     });
 
-    // Create content with identifiable head and tail
-    const head = "HEAD_MARKER_" + "A".repeat(10_000);
+    const head = "HEAD_MARKER_START" + "A".repeat(10_000);
     const middle = "B".repeat(100_000);
-    const tail = "C".repeat(10_000) + "_TAIL_MARKER";
+    const tail = "C".repeat(10_000) + "TAIL_MARKER_END";
     const bigContent = head + middle + tail;
 
-    const userMessage = {
-      role: "user" as const,
-      content: [
-        {
-          type: "tool_result" as const,
-          tool_use_id: "call_headtail",
-          content: bigContent,
-        },
-      ],
+    sm.saveMessage({
+      role: "user",
+      content: [{ type: "tool_result", tool_use_id: "call_ht", content: bigContent }],
       timestamp: Date.now(),
-    };
-
-    sm.saveMessage(userMessage);
+    } as any);
     await sm.flush();
 
     const entries = readEntries(sessionId, { baseDir: testDir });
     const saved = (entries.find((e) => e.type === "message") as any).message;
     const savedText = extractContentText(saved.content[0].content);
 
-    // Head should be preserved
-    expect(savedText).toContain("HEAD_MARKER_");
-    // Tail should be preserved
-    expect(savedText).toContain("_TAIL_MARKER");
-    // Middle should be truncated
+    expect(savedText).toContain("HEAD_MARKER_START");
+    expect(savedText).toContain("TAIL_MARKER_END");
     expect(savedText.length).toBeLessThan(bigContent.length);
+    // Must also have the truncation marker
+    expect(savedText).toContain("Tool result truncated");
   });
 
-  it("handles multiple tool results in same message", async () => {
+  // UC4: Multiple tool results — selective truncation
+  it("UC4: message with mixed-size tool results truncates only oversized ones", async () => {
     const sm = new SessionManager({
       sessionId,
       baseDir: testDir,
@@ -178,69 +170,160 @@ describe("Phase 1 E2E: Artifact Storage + Pre-emptive Truncation", () => {
       enableToolResultPruning: false,
     });
 
-    const bigContent1 = "RESULT1_" + "X".repeat(200_000);
-    const smallContent = "small result";
-    const bigContent2 = "RESULT2_" + "Y".repeat(200_000);
+    const big1 = "BIG1_" + "X".repeat(200_000);
+    const small = "SMALL_RESULT_INTACT";
+    const big2 = "BIG2_" + "Y".repeat(200_000);
 
-    const userMessage = {
-      role: "user" as const,
+    sm.saveMessage({
+      role: "user",
       content: [
-        { type: "tool_result" as const, tool_use_id: "call_big1", content: bigContent1 },
-        { type: "tool_result" as const, tool_use_id: "call_small", content: smallContent },
-        { type: "tool_result" as const, tool_use_id: "call_big2", content: bigContent2 },
+        { type: "tool_result", tool_use_id: "call_big1", content: big1 },
+        { type: "tool_result", tool_use_id: "call_sm", content: small },
+        { type: "tool_result", tool_use_id: "call_big2", content: big2 },
       ],
       timestamp: Date.now(),
-    };
-
-    sm.saveMessage(userMessage);
+    } as any);
     await sm.flush();
 
     const entries = readEntries(sessionId, { baseDir: testDir });
     const saved = (entries.find((e) => e.type === "message") as any).message;
 
-    // Big results should be truncated
-    const text0 = extractContentText(saved.content[0].content);
-    const text2 = extractContentText(saved.content[2].content);
-    expect(text0).toContain("Tool result truncated");
-    expect(text2).toContain("Tool result truncated");
+    // Big results: truncated
+    const t0 = extractContentText(saved.content[0].content);
+    const t2 = extractContentText(saved.content[2].content);
+    expect(t0).toContain("Tool result truncated");
+    expect(t2).toContain("Tool result truncated");
+    expect(t0.length).toBeLessThan(big1.length);
+    expect(t2.length).toBeLessThan(big2.length);
 
-    // Small result should be unchanged
-    const text1 = extractContentText(saved.content[1].content);
-    expect(text1).toBe(smallContent);
+    // Small result: intact
+    const t1 = extractContentText(saved.content[1].content);
+    expect(t1).toBe(small);
 
-    // Both artifacts should exist
+    // Both artifacts saved with full content
     const art1 = readToolResultArtifact(sessionId, "call_big1", { baseDir: testDir });
-    expect(art1).toContain("RESULT1_");
+    expect(art1).toBe(big1);
     const art2 = readToolResultArtifact(sessionId, "call_big2", { baseDir: testDir });
-    expect(art2).toContain("RESULT2_");
+    expect(art2).toBe(big2);
   });
 
-  it("respects enableToolResultTruncation=false", async () => {
+  // UC5: Feature disabled → no truncation
+  it("UC5: enableToolResultTruncation=false skips all truncation", async () => {
     const sm = new SessionManager({
       sessionId,
       baseDir: testDir,
       compactionMode: "tokens",
       contextWindowTokens: 50_000,
-      enableToolResultTruncation: false, // Disabled
+      enableToolResultTruncation: false,
       enableToolResultPruning: false,
     });
 
     const bigContent = "Z".repeat(200_000);
-    const userMessage = {
-      role: "user" as const,
-      content: [
-        { type: "tool_result" as const, tool_use_id: "call_noop", content: bigContent },
-      ],
+    sm.saveMessage({
+      role: "user",
+      content: [{ type: "tool_result", tool_use_id: "call_noop", content: bigContent }],
       timestamp: Date.now(),
-    };
-
-    sm.saveMessage(userMessage);
+    } as any);
     await sm.flush();
 
     const entries = readEntries(sessionId, { baseDir: testDir });
     const saved = (entries.find((e) => e.type === "message") as any).message;
-    // Should NOT be truncated since feature is disabled
     const savedText = extractContentText(saved.content[0].content);
     expect(savedText).toBe(bigContent);
+    expect(savedText).not.toContain("Tool result truncated");
+  });
+
+  // UC6: Session reload after truncation
+  it("UC6: loadMessages() returns truncated content after save+reload", async () => {
+    const sm = new SessionManager({
+      sessionId,
+      baseDir: testDir,
+      compactionMode: "tokens",
+      contextWindowTokens: 100_000,
+      enableToolResultTruncation: true,
+      enableToolResultPruning: false,
+    });
+
+    const bigContent = "RELOAD_TEST_" + "R".repeat(200_000);
+    sm.saveMessage({
+      role: "user",
+      content: [{ type: "tool_result", tool_use_id: "call_reload", content: bigContent }],
+      timestamp: Date.now(),
+    } as any);
+    await sm.flush();
+
+    // Create a fresh SessionManager to reload
+    const sm2 = new SessionManager({
+      sessionId,
+      baseDir: testDir,
+      compactionMode: "tokens",
+      contextWindowTokens: 100_000,
+    });
+    const messages = sm2.loadMessages();
+    expect(messages.length).toBe(1);
+
+    const loaded = messages[0] as any;
+    const loadedText = extractContentText(loaded.content[0].content);
+    // Loaded messages should show truncated content (not full)
+    expect(loadedText).toContain("Tool result truncated");
+    expect(loadedText).toContain("artifacts/");
+    expect(loadedText.length).toBeLessThan(bigContent.length);
+  });
+
+  // UC7: Truncation marker format
+  it("UC7: truncation marker contains original size and artifact path", async () => {
+    const sm = new SessionManager({
+      sessionId,
+      baseDir: testDir,
+      compactionMode: "tokens",
+      contextWindowTokens: 100_000,
+      enableToolResultTruncation: true,
+      enableToolResultPruning: false,
+    });
+
+    const bigContent = "M".repeat(200_000);
+    sm.saveMessage({
+      role: "user",
+      content: [{ type: "tool_result", tool_use_id: "call_fmt", content: bigContent }],
+      timestamp: Date.now(),
+    } as any);
+    await sm.flush();
+
+    const entries = readEntries(sessionId, { baseDir: testDir });
+    const saved = (entries.find((e) => e.type === "message") as any).message;
+    const savedText = extractContentText(saved.content[0].content);
+
+    // Marker should include: original size, artifact path, and "read tool" hint
+    expect(savedText).toMatch(/original 200000 chars/);
+    expect(savedText).toMatch(/Full result saved to artifacts\/call_fmt\.txt/);
+    expect(savedText).toContain("read tool");
+  });
+
+  // UC8: Artifact readable via readToolResultArtifact after session operations
+  it("UC8: artifact is readable by toolCallId and contains exact original content", async () => {
+    const sm = new SessionManager({
+      sessionId,
+      baseDir: testDir,
+      compactionMode: "tokens",
+      contextWindowTokens: 100_000,
+      enableToolResultTruncation: true,
+      enableToolResultPruning: false,
+    });
+
+    // Use content with specific patterns to verify exact preservation
+    const specialContent = "START|" + "αβγδ".repeat(50_000) + "|END";
+    sm.saveMessage({
+      role: "user",
+      content: [{ type: "tool_result", tool_use_id: "call_exact", content: specialContent }],
+      timestamp: Date.now(),
+    } as any);
+    await sm.flush();
+
+    const artifact = readToolResultArtifact(sessionId, "call_exact", { baseDir: testDir });
+    expect(artifact).toBe(specialContent);
+
+    // Also verify the artifacts directory exists
+    const artifactsDir = join(testDir, sessionId, "artifacts");
+    expect(existsSync(artifactsDir)).toBe(true);
   });
 });