test(compaction): harden E2E integration tests for artifact pipeline
- Add real user messages for bootstrap protection in pruning tests - Fix artifact directory path assertions (baseDir vs sessions/baseDir) - Add cross-phase tests (Phase 1 truncation → Phase 2 pruning) - Remove conditional assertion guards that could silently skip checks - All 30 E2E integration tests now pass with mandatory assertions Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
parent
58f02a2080
commit
b15e1eeb2a
2 changed files with 406 additions and 177 deletions
|
|
@ -1,26 +1,26 @@
|
|||
/**
|
||||
* E2E Integration Test: Phase 2 — Artifact-Aware Pruning + Summary Fallback
|
||||
*
|
||||
* Tests that tool result pruning preserves artifact references
|
||||
* and that summary fallback extracts artifact paths.
|
||||
* Test Matrix:
|
||||
* ┌──────────────────────────────────────────────┬──────────────────────────────┐
|
||||
* │ Use Case │ Expected Outcome │
|
||||
* ├──────────────────────────────────────────────┼──────────────────────────────┤
|
||||
* │ UC1: Soft trim with artifact ref │ Artifact ref in trim note │
|
||||
* │ UC2: Hard clear with artifact ref │ Artifact ref in placeholder │
|
||||
* │ UC3: Soft trim without artifact ref │ Normal trim (no artifact) │
|
||||
* │ UC4: Summary fallback extracts artifact refs │ "Saved Artifacts" section │
|
||||
* │ UC5: Cross-phase: Phase1 output → Phase2 │ Ref survives full pipeline │
|
||||
* └──────────────────────────────────────────────┴──────────────────────────────┘
|
||||
*/
|
||||
import { describe, it, expect } from "vitest";
|
||||
import { pruneToolResults } from "./tool-result-pruning.js";
|
||||
import { truncateOversizedToolResults } from "./tool-result-truncation.js";
|
||||
import type { AgentMessage } from "@mariozechner/pi-agent-core";
|
||||
|
||||
/**
|
||||
* Helper: build a user message with a single tool_result containing the given text.
|
||||
*/
|
||||
function makeToolResultMessage(text: string, toolUseId = "call_1"): AgentMessage {
|
||||
return {
|
||||
role: "user",
|
||||
content: [
|
||||
{
|
||||
type: "tool_result",
|
||||
tool_use_id: toolUseId,
|
||||
content: text,
|
||||
},
|
||||
],
|
||||
content: [{ type: "tool_result", tool_use_id: toolUseId, content: text }],
|
||||
timestamp: Date.now(),
|
||||
} as any;
|
||||
}
|
||||
|
|
@ -33,17 +33,37 @@ function makeAssistantMessage(text = "OK"): AgentMessage {
|
|||
} as any;
|
||||
}
|
||||
|
||||
/** A real user message (not tool_result) — needed for bootstrap protection in pruneToolResults */
|
||||
function makeUserMessage(text = "Hello"): AgentMessage {
|
||||
return {
|
||||
role: "user",
|
||||
content: text,
|
||||
timestamp: Date.now(),
|
||||
} as any;
|
||||
}
|
||||
|
||||
function extractContentText(content: unknown): string {
|
||||
if (typeof content === "string") return content;
|
||||
if (Array.isArray(content)) {
|
||||
return content
|
||||
.filter((b: any) => b?.type === "text")
|
||||
.map((b: any) => b.text)
|
||||
.join("");
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
describe("Phase 2 E2E: Artifact-Aware Pruning", () => {
|
||||
it("soft trim preserves artifact reference from pre-emptive truncation", () => {
|
||||
// Simulate a tool result that was previously truncated by Phase 1 and contains an artifact ref
|
||||
// UC1: Soft trim preserves artifact reference
|
||||
it("UC1: soft trim preserves artifact reference in trimmed note", () => {
|
||||
// Tool result with an artifact reference from Phase 1 truncation
|
||||
const truncatedContent =
|
||||
"A".repeat(3000) +
|
||||
"\n\n[Tool result truncated: original 200000 chars. Full result saved to artifacts/call_abc123.txt. Use the read tool to access the complete data if needed.]\n\n" +
|
||||
"B".repeat(3000);
|
||||
|
||||
// Build conversation that should trigger soft trimming
|
||||
// Put older messages first (these get pruned), recent ones are protected
|
||||
const messages: AgentMessage[] = [
|
||||
makeUserMessage("start"),
|
||||
makeAssistantMessage("Calling tool..."),
|
||||
makeToolResultMessage(truncatedContent),
|
||||
makeAssistantMessage("Processing..."),
|
||||
|
|
@ -58,43 +78,38 @@ describe("Phase 2 E2E: Artifact-Aware Pruning", () => {
|
|||
|
||||
const result = pruneToolResults({
|
||||
messages,
|
||||
contextWindowTokens: 5_000, // Small window to trigger pruning
|
||||
contextWindowTokens: 5_000,
|
||||
settings: {
|
||||
softTrimRatio: 0.0, // Always trigger soft trim
|
||||
softTrimRatio: 0.0, // Always trigger
|
||||
hardClearRatio: 1.0, // Never hard clear
|
||||
minPrunableToolChars: 100,
|
||||
keepLastAssistants: 3,
|
||||
softTrim: {
|
||||
maxChars: 2_000, // Trigger on the large result
|
||||
headChars: 500,
|
||||
tailChars: 500,
|
||||
},
|
||||
hardClear: {
|
||||
enabled: false,
|
||||
placeholder: "[Content removed]",
|
||||
},
|
||||
softTrim: { maxChars: 2_000, headChars: 500, tailChars: 500 },
|
||||
hardClear: { enabled: false, placeholder: "[Content removed]" },
|
||||
},
|
||||
});
|
||||
|
||||
// Find the soft-trimmed message
|
||||
if (result.changed && result.softTrimmed > 0) {
|
||||
const trimmedMsg = result.messages[1] as any;
|
||||
const text = trimmedMsg.content[0]?.text ?? trimmedMsg.content[0]?.content ?? "";
|
||||
// The artifact reference should be preserved in the trim note
|
||||
expect(text).toContain("artifacts/call_abc123.txt");
|
||||
}
|
||||
// Must actually trigger soft trimming
|
||||
expect(result.changed).toBe(true);
|
||||
expect(result.softTrimmed).toBeGreaterThan(0);
|
||||
|
||||
// The trimmed message should preserve the artifact reference (index 2 due to prepended user msg)
|
||||
const trimmedMsg = result.messages[2] as any;
|
||||
const text = extractContentText(trimmedMsg.content[0]?.content ?? trimmedMsg.content[0]);
|
||||
expect(text).toContain("artifacts/call_abc123.txt");
|
||||
});
|
||||
|
||||
it("hard clear preserves artifact reference", () => {
|
||||
// UC2: Hard clear preserves artifact reference
|
||||
it("UC2: hard clear preserves artifact reference in placeholder", () => {
|
||||
const truncatedContent =
|
||||
"X".repeat(80_000) +
|
||||
"\n\n[Tool result truncated: Full result saved to artifacts/call_xyz.txt.]\n\n" +
|
||||
"Y".repeat(20_000);
|
||||
|
||||
const messages: AgentMessage[] = [
|
||||
makeUserMessage("start"),
|
||||
makeAssistantMessage("old"),
|
||||
makeToolResultMessage(truncatedContent),
|
||||
// Add enough recent messages to push the old one into hard-clear range
|
||||
makeAssistantMessage("a1"),
|
||||
makeToolResultMessage("r1"),
|
||||
makeAssistantMessage("a2"),
|
||||
|
|
@ -110,45 +125,69 @@ describe("Phase 2 E2E: Artifact-Aware Pruning", () => {
|
|||
contextWindowTokens: 2_000,
|
||||
settings: {
|
||||
softTrimRatio: 0.0,
|
||||
hardClearRatio: 0.0, // Always trigger hard clear
|
||||
hardClearRatio: 0.0, // Always trigger
|
||||
minPrunableToolChars: 100,
|
||||
keepLastAssistants: 3,
|
||||
softTrim: {
|
||||
maxChars: 50, // Everything over 50 gets soft trimmed first
|
||||
headChars: 20,
|
||||
tailChars: 20,
|
||||
},
|
||||
hardClear: {
|
||||
enabled: true,
|
||||
placeholder: "[Content removed]",
|
||||
},
|
||||
softTrim: { maxChars: 50, headChars: 20, tailChars: 20 },
|
||||
hardClear: { enabled: true, placeholder: "[Content removed]" },
|
||||
},
|
||||
});
|
||||
|
||||
if (result.changed && result.hardCleared > 0) {
|
||||
// Find the hard-cleared message (should be messages[1])
|
||||
const clearedMsg = result.messages[1] as any;
|
||||
const text = clearedMsg.content[0]?.text ?? "";
|
||||
expect(text).toContain("[Content removed]");
|
||||
expect(text).toContain("artifacts/call_xyz.txt");
|
||||
}
|
||||
expect(result.changed).toBe(true);
|
||||
expect(result.hardCleared).toBeGreaterThan(0);
|
||||
|
||||
// The hard-cleared message should contain both the placeholder AND the artifact ref
|
||||
const clearedMsg = result.messages[2] as any;
|
||||
const text = extractContentText(clearedMsg.content[0]?.content ?? clearedMsg.content[0]);
|
||||
expect(text).toContain("[Content removed]");
|
||||
expect(text).toContain("artifacts/call_xyz.txt");
|
||||
});
|
||||
|
||||
// UC3: Soft trim without artifact ref (baseline behavior unchanged)
|
||||
it("UC3: soft trim without artifact reference works normally", () => {
|
||||
const plainContent = "D".repeat(6_000); // No artifact reference
|
||||
|
||||
const messages: AgentMessage[] = [
|
||||
makeUserMessage("start"),
|
||||
makeAssistantMessage("call"),
|
||||
makeToolResultMessage(plainContent),
|
||||
makeAssistantMessage("r1"),
|
||||
makeToolResultMessage("s"),
|
||||
makeAssistantMessage("r2"),
|
||||
makeToolResultMessage("s"),
|
||||
makeAssistantMessage("r3"),
|
||||
makeToolResultMessage("s"),
|
||||
];
|
||||
|
||||
const result = pruneToolResults({
|
||||
messages,
|
||||
contextWindowTokens: 5_000,
|
||||
settings: {
|
||||
softTrimRatio: 0.0,
|
||||
hardClearRatio: 1.0,
|
||||
minPrunableToolChars: 100,
|
||||
keepLastAssistants: 3,
|
||||
softTrim: { maxChars: 2_000, headChars: 500, tailChars: 500 },
|
||||
hardClear: { enabled: false, placeholder: "" },
|
||||
},
|
||||
});
|
||||
|
||||
expect(result.changed).toBe(true);
|
||||
expect(result.softTrimmed).toBeGreaterThan(0);
|
||||
|
||||
const trimmedMsg = result.messages[2] as any;
|
||||
const text = extractContentText(trimmedMsg.content[0]?.content ?? trimmedMsg.content[0]);
|
||||
// Should have trim note but no artifact reference
|
||||
expect(text).toContain("Tool result trimmed");
|
||||
expect(text).not.toContain("artifacts/");
|
||||
});
|
||||
});
|
||||
|
||||
describe("Phase 2 E2E: Summary Fallback Artifact Extraction", () => {
|
||||
it("DEFAULT_SUMMARY_INSTRUCTIONS mentions artifacts", async () => {
|
||||
// Read the summarization module to verify instructions include artifact guidance
|
||||
const { DEFAULT_SUMMARY_INSTRUCTIONS } = await import("./summarization.js") as any;
|
||||
// The instructions are a module-level const, but not exported. Let's verify via
|
||||
// the splitMessagesForSummary path that exercises the flow indirectly.
|
||||
// Instead, let's verify the artifact detection in summary-fallback.
|
||||
});
|
||||
|
||||
it("summary fallback includes artifact references section", async () => {
|
||||
// Import the module to access the plain text fallback
|
||||
// UC4: summary fallback extracts artifact references
|
||||
it("UC4: summary fallback includes 'Saved Artifacts' section with all artifact refs", async () => {
|
||||
const mod = await import("./summary-fallback.js");
|
||||
|
||||
// Create messages with artifact references embedded in tool results
|
||||
const messages: AgentMessage[] = [
|
||||
makeAssistantMessage("Let me read the file"),
|
||||
{
|
||||
|
|
@ -181,33 +220,140 @@ describe("Phase 2 E2E: Summary Fallback Artifact Extraction", () => {
|
|||
} as any,
|
||||
];
|
||||
|
||||
// Use summarizeWithFallback to exercise the full flow — but this requires
|
||||
// an LLM model. Instead, we can test the behavior by causing all levels to fail.
|
||||
// The summarizeWithFallback will fall through to Level 3 (plain text) if the model fails.
|
||||
// Let's create a mock model that always throws.
|
||||
// Force Level 3 fallback (plain text) by using a model that always throws
|
||||
const failingModel = {
|
||||
complete: () => { throw new Error("Test: no LLM available"); },
|
||||
complete: () => { throw new Error("Test: no LLM"); },
|
||||
};
|
||||
|
||||
try {
|
||||
const result = await mod.summarizeWithFallback({
|
||||
messages,
|
||||
model: failingModel as any,
|
||||
reserveTokens: 1024,
|
||||
apiKey: "test-key",
|
||||
instructions: "summarize",
|
||||
availableTokens: 100_000,
|
||||
});
|
||||
const result = await mod.summarizeWithFallback({
|
||||
messages,
|
||||
model: failingModel as any,
|
||||
reserveTokens: 1024,
|
||||
apiKey: "test-key",
|
||||
instructions: "summarize",
|
||||
availableTokens: 100_000,
|
||||
});
|
||||
|
||||
// Should fall through to Level 3 (plain-text fallback)
|
||||
expect(result.level).toBe(3);
|
||||
// The summary should contain artifact references
|
||||
expect(result.summary).toContain("## Saved Artifacts");
|
||||
expect(result.summary).toContain("artifacts/call_1.txt");
|
||||
expect(result.summary).toContain("artifacts/call_2.txt");
|
||||
} catch {
|
||||
// If generateSummary isn't available as expected, at least verify
|
||||
// the artifact extraction pattern works at the module level
|
||||
}
|
||||
// Must fall through to Level 3
|
||||
expect(result.level).toBe(3);
|
||||
// Summary must contain artifact references
|
||||
expect(result.summary).toContain("## Saved Artifacts");
|
||||
expect(result.summary).toContain("artifacts/call_1.txt");
|
||||
expect(result.summary).toContain("artifacts/call_2.txt");
|
||||
});
|
||||
});
|
||||
|
||||
describe("Cross-Phase E2E: Phase 1 → Phase 2 Pipeline", () => {
|
||||
// UC5: Phase 1 truncation output → Phase 2 pruning — artifact ref survives
|
||||
it("UC5: artifact ref from Phase 1 truncation survives Phase 2 soft trim", () => {
|
||||
// Phase 1: truncate an oversized tool result
|
||||
const bigContent = "ORIGINAL_DATA_" + "Q".repeat(200_000);
|
||||
let artifactPath = "";
|
||||
|
||||
const phase1Result = truncateOversizedToolResults({
|
||||
message: {
|
||||
role: "user",
|
||||
content: [{ type: "tool_result", tool_use_id: "call_cross", content: bigContent }],
|
||||
timestamp: Date.now(),
|
||||
} as any,
|
||||
contextWindowTokens: 50_000,
|
||||
saveArtifact: (_id, _content) => {
|
||||
artifactPath = `artifacts/call_cross.txt`;
|
||||
return artifactPath;
|
||||
},
|
||||
});
|
||||
|
||||
// Phase 1 must have truncated
|
||||
expect(phase1Result.truncated).toBe(true);
|
||||
expect(phase1Result.artifacts.length).toBe(1);
|
||||
expect(phase1Result.artifacts[0]!.toolCallId).toBe("call_cross");
|
||||
|
||||
// Extract the truncated text from Phase 1 output
|
||||
const phase1Msg = phase1Result.message as any;
|
||||
const phase1Text = extractContentText(phase1Msg.content[0].content);
|
||||
expect(phase1Text).toContain("artifacts/call_cross.txt");
|
||||
|
||||
// Phase 2: feed Phase 1 output into pruneToolResults
|
||||
const messages: AgentMessage[] = [
|
||||
makeUserMessage("start"),
|
||||
makeAssistantMessage("calling"),
|
||||
phase1Result.message, // This is the Phase 1 truncated message
|
||||
makeAssistantMessage("a1"),
|
||||
makeToolResultMessage("s1"),
|
||||
makeAssistantMessage("a2"),
|
||||
makeToolResultMessage("s2"),
|
||||
makeAssistantMessage("a3"),
|
||||
makeToolResultMessage("s3"),
|
||||
];
|
||||
|
||||
const phase2Result = pruneToolResults({
|
||||
messages,
|
||||
contextWindowTokens: 3_000,
|
||||
settings: {
|
||||
softTrimRatio: 0.0, // Always trigger
|
||||
hardClearRatio: 1.0, // No hard clear
|
||||
minPrunableToolChars: 100,
|
||||
keepLastAssistants: 3,
|
||||
softTrim: { maxChars: 2_000, headChars: 500, tailChars: 500 },
|
||||
hardClear: { enabled: false, placeholder: "" },
|
||||
},
|
||||
});
|
||||
|
||||
expect(phase2Result.changed).toBe(true);
|
||||
|
||||
// The artifact reference must survive the Phase 2 soft trim (index 2 due to prepended user msg)
|
||||
const finalMsg = phase2Result.messages[2] as any;
|
||||
const finalText = extractContentText(finalMsg.content[0]?.content ?? finalMsg.content[0]);
|
||||
expect(finalText).toContain("artifacts/call_cross.txt");
|
||||
});
|
||||
|
||||
// UC5b: Phase 1 → Phase 2 hard clear also preserves
|
||||
it("UC5b: artifact ref from Phase 1 truncation survives Phase 2 hard clear", () => {
|
||||
const bigContent = "HC_DATA_" + "W".repeat(200_000);
|
||||
|
||||
const phase1Result = truncateOversizedToolResults({
|
||||
message: {
|
||||
role: "user",
|
||||
content: [{ type: "tool_result", tool_use_id: "call_hc", content: bigContent }],
|
||||
timestamp: Date.now(),
|
||||
} as any,
|
||||
contextWindowTokens: 50_000,
|
||||
saveArtifact: () => "artifacts/call_hc.txt",
|
||||
});
|
||||
|
||||
expect(phase1Result.truncated).toBe(true);
|
||||
|
||||
const messages: AgentMessage[] = [
|
||||
makeUserMessage("start"),
|
||||
makeAssistantMessage("calling"),
|
||||
phase1Result.message,
|
||||
makeAssistantMessage("a1"),
|
||||
makeToolResultMessage("s1"),
|
||||
makeAssistantMessage("a2"),
|
||||
makeToolResultMessage("s2"),
|
||||
makeAssistantMessage("a3"),
|
||||
makeToolResultMessage("s3"),
|
||||
];
|
||||
|
||||
const phase2Result = pruneToolResults({
|
||||
messages,
|
||||
contextWindowTokens: 1_000,
|
||||
settings: {
|
||||
softTrimRatio: 0.0,
|
||||
hardClearRatio: 0.0, // Always hard clear
|
||||
minPrunableToolChars: 100,
|
||||
keepLastAssistants: 3,
|
||||
softTrim: { maxChars: 50, headChars: 20, tailChars: 20 },
|
||||
hardClear: { enabled: true, placeholder: "[Cleared]" },
|
||||
},
|
||||
});
|
||||
|
||||
expect(phase2Result.changed).toBe(true);
|
||||
expect(phase2Result.hardCleared).toBeGreaterThan(0);
|
||||
|
||||
const finalMsg = phase2Result.messages[2] as any;
|
||||
const finalText = extractContentText(finalMsg.content[0]?.content ?? finalMsg.content[0]);
|
||||
expect(finalText).toContain("[Cleared]");
|
||||
expect(finalText).toContain("artifacts/call_hc.txt");
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -1,7 +1,21 @@
|
|||
/**
|
||||
* E2E Integration Test: Phase 1 — Artifact Storage + Pre-emptive Truncation
|
||||
*
|
||||
* Tests the full flow: SessionManager → truncateOversizedToolResults → artifact-store
|
||||
* Tests the full flow: SessionManager.saveMessage() → truncateOversizedToolResults → artifact-store
|
||||
*
|
||||
* Test Matrix:
|
||||
* ┌─────────────────────────────────────────┬──────────────────────┐
|
||||
* │ Use Case │ Expected Outcome │
|
||||
* ├─────────────────────────────────────────┼──────────────────────┤
|
||||
* │ UC1: Oversized tool result │ Truncated + artifact │
|
||||
* │ UC2: Small tool result │ Pass-through, no art │
|
||||
* │ UC3: Head/tail preservation │ Markers preserved │
|
||||
* │ UC4: Multiple results (mixed sizes) │ Selective truncation │
|
||||
* │ UC5: Feature toggle disabled │ No truncation │
|
||||
* │ UC6: Session reload after truncation │ Truncated content │
|
||||
* │ UC7: Truncation marker format │ Correct format │
|
||||
* │ UC8: Artifact readable after reload │ Full content intact │
|
||||
* └─────────────────────────────────────────┴──────────────────────┘
|
||||
*/
|
||||
import { describe, it, expect, beforeEach, afterEach } from "vitest";
|
||||
import { mkdirSync, rmSync, existsSync } from "node:fs";
|
||||
|
|
@ -45,7 +59,8 @@ describe("Phase 1 E2E: Artifact Storage + Pre-emptive Truncation", () => {
|
|||
rmSync(testDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
it("saves oversized tool result to artifact and truncates in session", async () => {
|
||||
// UC1: Oversized tool result → truncated in session + artifact saved
|
||||
it("UC1: oversized tool result is truncated and artifact is saved with full content", async () => {
|
||||
const sm = new SessionManager({
|
||||
sessionId,
|
||||
baseDir: testDir,
|
||||
|
|
@ -55,24 +70,15 @@ describe("Phase 1 E2E: Artifact Storage + Pre-emptive Truncation", () => {
|
|||
enableToolResultPruning: false,
|
||||
});
|
||||
|
||||
// Create an oversized tool result (> 30% of 100k * 4 chars = 120k chars)
|
||||
const bigContent = "X".repeat(200_000);
|
||||
const userMessage = {
|
||||
role: "user" as const,
|
||||
content: [
|
||||
{
|
||||
type: "tool_result" as const,
|
||||
tool_use_id: "call_abc123",
|
||||
content: bigContent,
|
||||
},
|
||||
],
|
||||
sm.saveMessage({
|
||||
role: "user",
|
||||
content: [{ type: "tool_result", tool_use_id: "call_abc123", content: bigContent }],
|
||||
timestamp: Date.now(),
|
||||
};
|
||||
|
||||
sm.saveMessage(userMessage);
|
||||
} as any);
|
||||
await sm.flush();
|
||||
|
||||
// Verify: session file has truncated content
|
||||
// Session file: truncated
|
||||
const entries = readEntries(sessionId, { baseDir: testDir });
|
||||
const msgEntries = entries.filter((e) => e.type === "message");
|
||||
expect(msgEntries.length).toBe(1);
|
||||
|
|
@ -83,12 +89,14 @@ describe("Phase 1 E2E: Artifact Storage + Pre-emptive Truncation", () => {
|
|||
expect(savedText).toContain("Tool result truncated");
|
||||
expect(savedText).toContain("artifacts/");
|
||||
|
||||
// Verify: artifact file exists with full content
|
||||
// Artifact: full content preserved
|
||||
const artifactContent = readToolResultArtifact(sessionId, "call_abc123", { baseDir: testDir });
|
||||
expect(artifactContent).toBe(bigContent);
|
||||
expect(artifactContent!.length).toBe(200_000);
|
||||
});
|
||||
|
||||
it("does NOT create artifact for small tool results", async () => {
|
||||
// UC2: Small tool result → pass-through, no artifact
|
||||
it("UC2: small tool result passes through without truncation or artifact", async () => {
|
||||
const sm = new SessionManager({
|
||||
sessionId,
|
||||
baseDir: testDir,
|
||||
|
|
@ -99,76 +107,60 @@ describe("Phase 1 E2E: Artifact Storage + Pre-emptive Truncation", () => {
|
|||
});
|
||||
|
||||
const smallContent = "Small result data";
|
||||
const userMessage = {
|
||||
role: "user" as const,
|
||||
content: [
|
||||
{
|
||||
type: "tool_result" as const,
|
||||
tool_use_id: "call_small",
|
||||
content: smallContent,
|
||||
},
|
||||
],
|
||||
sm.saveMessage({
|
||||
role: "user",
|
||||
content: [{ type: "tool_result", tool_use_id: "call_small", content: smallContent }],
|
||||
timestamp: Date.now(),
|
||||
};
|
||||
|
||||
sm.saveMessage(userMessage);
|
||||
} as any);
|
||||
await sm.flush();
|
||||
|
||||
// Verify: session file has full content (no truncation)
|
||||
// Session file: unchanged content
|
||||
const entries = readEntries(sessionId, { baseDir: testDir });
|
||||
const saved = (entries.find((e) => e.type === "message") as any).message;
|
||||
const savedText = extractContentText(saved.content[0].content);
|
||||
expect(savedText).toBe(smallContent);
|
||||
|
||||
// Verify: no artifacts directory created
|
||||
const artifactsDir = join(testDir, "sessions", sessionId, "artifacts");
|
||||
// No artifacts directory
|
||||
const artifactsDir = join(testDir, sessionId, "artifacts");
|
||||
expect(existsSync(artifactsDir)).toBe(false);
|
||||
});
|
||||
|
||||
it("truncated message preserves head and tail of original content", async () => {
|
||||
// UC3: Head/tail preservation
|
||||
it("UC3: truncated content preserves identifiable head and tail markers", async () => {
|
||||
const sm = new SessionManager({
|
||||
sessionId,
|
||||
baseDir: testDir,
|
||||
compactionMode: "tokens",
|
||||
contextWindowTokens: 50_000, // smaller window → lower threshold
|
||||
contextWindowTokens: 50_000,
|
||||
enableToolResultTruncation: true,
|
||||
enableToolResultPruning: false,
|
||||
});
|
||||
|
||||
// Create content with identifiable head and tail
|
||||
const head = "HEAD_MARKER_" + "A".repeat(10_000);
|
||||
const head = "HEAD_MARKER_START" + "A".repeat(10_000);
|
||||
const middle = "B".repeat(100_000);
|
||||
const tail = "C".repeat(10_000) + "_TAIL_MARKER";
|
||||
const tail = "C".repeat(10_000) + "TAIL_MARKER_END";
|
||||
const bigContent = head + middle + tail;
|
||||
|
||||
const userMessage = {
|
||||
role: "user" as const,
|
||||
content: [
|
||||
{
|
||||
type: "tool_result" as const,
|
||||
tool_use_id: "call_headtail",
|
||||
content: bigContent,
|
||||
},
|
||||
],
|
||||
sm.saveMessage({
|
||||
role: "user",
|
||||
content: [{ type: "tool_result", tool_use_id: "call_ht", content: bigContent }],
|
||||
timestamp: Date.now(),
|
||||
};
|
||||
|
||||
sm.saveMessage(userMessage);
|
||||
} as any);
|
||||
await sm.flush();
|
||||
|
||||
const entries = readEntries(sessionId, { baseDir: testDir });
|
||||
const saved = (entries.find((e) => e.type === "message") as any).message;
|
||||
const savedText = extractContentText(saved.content[0].content);
|
||||
|
||||
// Head should be preserved
|
||||
expect(savedText).toContain("HEAD_MARKER_");
|
||||
// Tail should be preserved
|
||||
expect(savedText).toContain("_TAIL_MARKER");
|
||||
// Middle should be truncated
|
||||
expect(savedText).toContain("HEAD_MARKER_START");
|
||||
expect(savedText).toContain("TAIL_MARKER_END");
|
||||
expect(savedText.length).toBeLessThan(bigContent.length);
|
||||
// Must also have the truncation marker
|
||||
expect(savedText).toContain("Tool result truncated");
|
||||
});
|
||||
|
||||
it("handles multiple tool results in same message", async () => {
|
||||
// UC4: Multiple tool results — selective truncation
|
||||
it("UC4: message with mixed-size tool results truncates only oversized ones", async () => {
|
||||
const sm = new SessionManager({
|
||||
sessionId,
|
||||
baseDir: testDir,
|
||||
|
|
@ -178,69 +170,160 @@ describe("Phase 1 E2E: Artifact Storage + Pre-emptive Truncation", () => {
|
|||
enableToolResultPruning: false,
|
||||
});
|
||||
|
||||
const bigContent1 = "RESULT1_" + "X".repeat(200_000);
|
||||
const smallContent = "small result";
|
||||
const bigContent2 = "RESULT2_" + "Y".repeat(200_000);
|
||||
const big1 = "BIG1_" + "X".repeat(200_000);
|
||||
const small = "SMALL_RESULT_INTACT";
|
||||
const big2 = "BIG2_" + "Y".repeat(200_000);
|
||||
|
||||
const userMessage = {
|
||||
role: "user" as const,
|
||||
sm.saveMessage({
|
||||
role: "user",
|
||||
content: [
|
||||
{ type: "tool_result" as const, tool_use_id: "call_big1", content: bigContent1 },
|
||||
{ type: "tool_result" as const, tool_use_id: "call_small", content: smallContent },
|
||||
{ type: "tool_result" as const, tool_use_id: "call_big2", content: bigContent2 },
|
||||
{ type: "tool_result", tool_use_id: "call_big1", content: big1 },
|
||||
{ type: "tool_result", tool_use_id: "call_sm", content: small },
|
||||
{ type: "tool_result", tool_use_id: "call_big2", content: big2 },
|
||||
],
|
||||
timestamp: Date.now(),
|
||||
};
|
||||
|
||||
sm.saveMessage(userMessage);
|
||||
} as any);
|
||||
await sm.flush();
|
||||
|
||||
const entries = readEntries(sessionId, { baseDir: testDir });
|
||||
const saved = (entries.find((e) => e.type === "message") as any).message;
|
||||
|
||||
// Big results should be truncated
|
||||
const text0 = extractContentText(saved.content[0].content);
|
||||
const text2 = extractContentText(saved.content[2].content);
|
||||
expect(text0).toContain("Tool result truncated");
|
||||
expect(text2).toContain("Tool result truncated");
|
||||
// Big results: truncated
|
||||
const t0 = extractContentText(saved.content[0].content);
|
||||
const t2 = extractContentText(saved.content[2].content);
|
||||
expect(t0).toContain("Tool result truncated");
|
||||
expect(t2).toContain("Tool result truncated");
|
||||
expect(t0.length).toBeLessThan(big1.length);
|
||||
expect(t2.length).toBeLessThan(big2.length);
|
||||
|
||||
// Small result should be unchanged
|
||||
const text1 = extractContentText(saved.content[1].content);
|
||||
expect(text1).toBe(smallContent);
|
||||
// Small result: intact
|
||||
const t1 = extractContentText(saved.content[1].content);
|
||||
expect(t1).toBe(small);
|
||||
|
||||
// Both artifacts should exist
|
||||
// Both artifacts saved with full content
|
||||
const art1 = readToolResultArtifact(sessionId, "call_big1", { baseDir: testDir });
|
||||
expect(art1).toContain("RESULT1_");
|
||||
expect(art1).toBe(big1);
|
||||
const art2 = readToolResultArtifact(sessionId, "call_big2", { baseDir: testDir });
|
||||
expect(art2).toContain("RESULT2_");
|
||||
expect(art2).toBe(big2);
|
||||
});
|
||||
|
||||
it("respects enableToolResultTruncation=false", async () => {
|
||||
// UC5: Feature disabled → no truncation
|
||||
it("UC5: enableToolResultTruncation=false skips all truncation", async () => {
|
||||
const sm = new SessionManager({
|
||||
sessionId,
|
||||
baseDir: testDir,
|
||||
compactionMode: "tokens",
|
||||
contextWindowTokens: 50_000,
|
||||
enableToolResultTruncation: false, // Disabled
|
||||
enableToolResultTruncation: false,
|
||||
enableToolResultPruning: false,
|
||||
});
|
||||
|
||||
const bigContent = "Z".repeat(200_000);
|
||||
const userMessage = {
|
||||
role: "user" as const,
|
||||
content: [
|
||||
{ type: "tool_result" as const, tool_use_id: "call_noop", content: bigContent },
|
||||
],
|
||||
sm.saveMessage({
|
||||
role: "user",
|
||||
content: [{ type: "tool_result", tool_use_id: "call_noop", content: bigContent }],
|
||||
timestamp: Date.now(),
|
||||
};
|
||||
|
||||
sm.saveMessage(userMessage);
|
||||
} as any);
|
||||
await sm.flush();
|
||||
|
||||
const entries = readEntries(sessionId, { baseDir: testDir });
|
||||
const saved = (entries.find((e) => e.type === "message") as any).message;
|
||||
// Should NOT be truncated since feature is disabled
|
||||
const savedText = extractContentText(saved.content[0].content);
|
||||
expect(savedText).toBe(bigContent);
|
||||
expect(savedText).not.toContain("Tool result truncated");
|
||||
});
|
||||
|
||||
// UC6: Session reload after truncation
|
||||
it("UC6: loadMessages() returns truncated content after save+reload", async () => {
|
||||
const sm = new SessionManager({
|
||||
sessionId,
|
||||
baseDir: testDir,
|
||||
compactionMode: "tokens",
|
||||
contextWindowTokens: 100_000,
|
||||
enableToolResultTruncation: true,
|
||||
enableToolResultPruning: false,
|
||||
});
|
||||
|
||||
const bigContent = "RELOAD_TEST_" + "R".repeat(200_000);
|
||||
sm.saveMessage({
|
||||
role: "user",
|
||||
content: [{ type: "tool_result", tool_use_id: "call_reload", content: bigContent }],
|
||||
timestamp: Date.now(),
|
||||
} as any);
|
||||
await sm.flush();
|
||||
|
||||
// Create a fresh SessionManager to reload
|
||||
const sm2 = new SessionManager({
|
||||
sessionId,
|
||||
baseDir: testDir,
|
||||
compactionMode: "tokens",
|
||||
contextWindowTokens: 100_000,
|
||||
});
|
||||
const messages = sm2.loadMessages();
|
||||
expect(messages.length).toBe(1);
|
||||
|
||||
const loaded = messages[0] as any;
|
||||
const loadedText = extractContentText(loaded.content[0].content);
|
||||
// Loaded messages should show truncated content (not full)
|
||||
expect(loadedText).toContain("Tool result truncated");
|
||||
expect(loadedText).toContain("artifacts/");
|
||||
expect(loadedText.length).toBeLessThan(bigContent.length);
|
||||
});
|
||||
|
||||
// UC7: Truncation marker format
|
||||
it("UC7: truncation marker contains original size and artifact path", async () => {
|
||||
const sm = new SessionManager({
|
||||
sessionId,
|
||||
baseDir: testDir,
|
||||
compactionMode: "tokens",
|
||||
contextWindowTokens: 100_000,
|
||||
enableToolResultTruncation: true,
|
||||
enableToolResultPruning: false,
|
||||
});
|
||||
|
||||
const bigContent = "M".repeat(200_000);
|
||||
sm.saveMessage({
|
||||
role: "user",
|
||||
content: [{ type: "tool_result", tool_use_id: "call_fmt", content: bigContent }],
|
||||
timestamp: Date.now(),
|
||||
} as any);
|
||||
await sm.flush();
|
||||
|
||||
const entries = readEntries(sessionId, { baseDir: testDir });
|
||||
const saved = (entries.find((e) => e.type === "message") as any).message;
|
||||
const savedText = extractContentText(saved.content[0].content);
|
||||
|
||||
// Marker should include: original size, artifact path, and "read tool" hint
|
||||
expect(savedText).toMatch(/original 200000 chars/);
|
||||
expect(savedText).toMatch(/Full result saved to artifacts\/call_fmt\.txt/);
|
||||
expect(savedText).toContain("read tool");
|
||||
});
|
||||
|
||||
// UC8: Artifact readable via readToolResultArtifact after session operations
|
||||
it("UC8: artifact is readable by toolCallId and contains exact original content", async () => {
|
||||
const sm = new SessionManager({
|
||||
sessionId,
|
||||
baseDir: testDir,
|
||||
compactionMode: "tokens",
|
||||
contextWindowTokens: 100_000,
|
||||
enableToolResultTruncation: true,
|
||||
enableToolResultPruning: false,
|
||||
});
|
||||
|
||||
// Use content with specific patterns to verify exact preservation
|
||||
const specialContent = "START|" + "αβγδ".repeat(50_000) + "|END";
|
||||
sm.saveMessage({
|
||||
role: "user",
|
||||
content: [{ type: "tool_result", tool_use_id: "call_exact", content: specialContent }],
|
||||
timestamp: Date.now(),
|
||||
} as any);
|
||||
await sm.flush();
|
||||
|
||||
const artifact = readToolResultArtifact(sessionId, "call_exact", { baseDir: testDir });
|
||||
expect(artifact).toBe(specialContent);
|
||||
|
||||
// Also verify the artifacts directory exists
|
||||
const artifactsDir = join(testDir, sessionId, "artifacts");
|
||||
expect(existsSync(artifactsDir)).toBe(true);
|
||||
});
|
||||
});
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue