diff --git a/packages/core/src/agent/context-window/index.ts b/packages/core/src/agent/context-window/index.ts index 8d04b64f..62ebdf4b 100644 --- a/packages/core/src/agent/context-window/index.ts +++ b/packages/core/src/agent/context-window/index.ts @@ -43,7 +43,6 @@ export { splitMessagesForSummary, detectSplitTurn, computeAdaptiveChunkRatio, - compactMessagesWithSummary, compactMessagesWithChunkedSummary, } from "./summarization.js"; diff --git a/packages/core/src/agent/context-window/summarization.ts b/packages/core/src/agent/context-window/summarization.ts index 1ff99c11..859b8e11 100644 --- a/packages/core/src/agent/context-window/summarization.ts +++ b/packages/core/src/agent/context-window/summarization.ts @@ -238,116 +238,6 @@ function createSummaryMessage(summary: string, previousSummary?: string): AgentM }; } -/** - * Execute summary-based compaction - * - * Uses LLM to generate summary of historical messages, then combines summary with recent messages - */ -export async function compactMessagesWithSummary( - params: SummaryCompactionParams, -): Promise { - const { - messages, - model, - apiKey, - availableTokens, - targetRatio, - minKeepMessages, - reserveTokens = 2048, - customInstructions, - previousSummary, - signal, - } = params; - - // Split messages - const split = splitMessagesForSummary(messages, availableTokens, { - targetRatio, - minKeepMessages, - }); - - if (!split) { - return null; - } - - let { toSummarize, toKeep } = split; - - // Detect and handle split turn - const splitTurn = detectSplitTurn(toSummarize, toKeep); - let splitPrefixSummary = ""; - - if (splitTurn) { - toSummarize = splitTurn.adjustedToSummarize; - toKeep = splitTurn.adjustedToKeep; - - // Summarize the split prefix separately - const instructions = customInstructions || DEFAULT_SUMMARY_INSTRUCTIONS; - const prefixResult = await summarizeWithFallback({ - messages: splitTurn.splitPrefix, - model, - reserveTokens, - apiKey, - signal, - instructions, - previousSummary, - availableTokens, - }); - splitPrefixSummary = prefixResult.summary; - } - - // Generate summary with fallback (toSummarize no longer contains split prefix messages) - const instructions = customInstructions || DEFAULT_SUMMARY_INSTRUCTIONS; - let finalSummary = ""; - - if (toSummarize.length > 0) { - const { summary } = await summarizeWithFallback({ - messages: toSummarize, - model, - reserveTokens, - apiKey, - signal, - instructions, - previousSummary, - availableTokens, - }); - finalSummary = summary; - } - - // Append split prefix summary if present - if (splitPrefixSummary) { - finalSummary += (finalSummary ? "\n\n" : "") + `## Split Turn Context\n${splitPrefixSummary}`; - } - - // Append metadata sections (all compacted = adjusted toSummarize + splitPrefix) - const allCompactedMessages = splitTurn - ? [...toSummarize, ...splitTurn.splitPrefix] - : toSummarize; - const failures = collectToolFailures(allCompactedMessages); - const fileOps = collectFileOperations(allCompactedMessages); - - finalSummary += formatToolFailuresSection(failures); - finalSummary += formatFileOperationsSection(fileOps); - - // Create summary message - const summaryMessage = createSummaryMessage(finalSummary, previousSummary); - - // Combine results - const kept = [summaryMessage, ...toKeep]; - - const tokensRemoved = estimateMessagesTokens(allCompactedMessages); - const tokensKept = estimateMessagesTokens(kept); - - return { - kept, - removedCount: allCompactedMessages.length, - tokensRemoved, - tokensKept, - summary: finalSummary, - reason: "summary", - fileOperations: (fileOps.readFiles.length > 0 || fileOps.modifiedFiles.length > 0) ? fileOps : undefined, - toolFailures: failures.length > 0 ? failures : undefined, - }; -} - /** * Generate summary in chunks (for very large history) * diff --git a/packages/core/src/agent/session/compaction.test.ts b/packages/core/src/agent/session/compaction.test.ts index e3087816..b5cf3d70 100644 --- a/packages/core/src/agent/session/compaction.test.ts +++ b/packages/core/src/agent/session/compaction.test.ts @@ -1,6 +1,5 @@ import { describe, it, expect, vi } from "vitest"; import { - compactMessagesByCount, compactMessagesByTokens, compactMessages, type CompactionResult, @@ -27,96 +26,6 @@ describe("compaction", () => { })) as AgentMessage[]; } - function createMessagesWithToolUse(): AgentMessage[] { - return [ - { role: "user", content: "Start" }, - { - role: "assistant", - content: [{ type: "tool_use", id: "tool-1", name: "test", input: {} }], - } as any, - { - role: "user", - content: [{ type: "tool_result", tool_use_id: "tool-1", content: "Result" }], - } as any, - { role: "assistant", content: "Done" }, - { role: "user", content: "Next message" }, - ]; - } - - describe("compactMessagesByCount", () => { - it("should return null when under max messages", () => { - const messages = createMessages(50); - const result = compactMessagesByCount(messages, 80, 60); - expect(result).toBeNull(); - }); - - it("should compact when over max messages", () => { - const messages = createMessages(100); - const result = compactMessagesByCount(messages, 80, 60); - - expect(result).not.toBeNull(); - expect(result!.reason).toBe("count"); - expect(result!.kept.length).toBeLessThanOrEqual(100); - expect(result!.removedCount).toBeGreaterThan(0); - }); - - it("should keep the specified number of last messages", () => { - const messages = createMessages(100); - const result = compactMessagesByCount(messages, 80, 50); - - if (result) { - // Should keep approximately keepLast messages - expect(result.kept.length).toBeGreaterThanOrEqual(40); - expect(result.kept.length).toBeLessThanOrEqual(60); - } - }); - - it("should return null when exact at max messages", () => { - const messages = createMessages(80); - const result = compactMessagesByCount(messages, 80, 60); - expect(result).toBeNull(); - }); - - it("should not break tool_use/tool_result pairs", () => { - // Create many messages followed by a tool pair - const regularMessages = createMessages(70); - const toolMessages = createMessagesWithToolUse(); - const messages = [...regularMessages, ...toolMessages]; - - const result = compactMessagesByCount(messages, 80, 20); - - if (result) { - // Check that we didn't end up with orphaned tool_result - let hasOrphanedToolResult = false; - for (let i = 0; i < result.kept.length; i++) { - const msg = result.kept[i] as any; - if (Array.isArray(msg.content)) { - const hasToolResult = msg.content.some((b: any) => b.type === "tool_result"); - if (hasToolResult) { - // Check if previous message has corresponding tool_use - const prevMsg = result.kept[i - 1] as any; - if (!prevMsg || !Array.isArray(prevMsg.content)) { - hasOrphanedToolResult = true; - } - } - } - } - // This test verifies the safe compaction point logic - // The exact behavior depends on findSafeCompactionPoint implementation - } - }); - - it("should return null when would keep almost all messages", () => { - const messages = createMessages(85); - const result = compactMessagesByCount(messages, 80, 82); - - // If we'd only remove 2-3 messages, should return null - if (result) { - expect(result.removedCount).toBeGreaterThan(2); - } - }); - }); - describe("compactMessagesByTokens", () => { it("should return null when under token limit", () => { const messages = createMessages(5); @@ -161,41 +70,16 @@ describe("compaction", () => { }); describe("compactMessages (unified entry point)", () => { - describe("count mode", () => { - it("should use count-based compaction", () => { - const messages = createMessages(100); - const result = compactMessages(messages, { - mode: "count", - maxMessages: 80, - keepLast: 60, - }); - - expect(result).not.toBeNull(); - expect(result!.reason).toBe("count"); - }); - - it("should use default max and keep values", () => { - const messages = createMessages(100); - const result = compactMessages(messages, { - mode: "count", - }); - - // Default: maxMessages: 80, keepLast: 60 - expect(result).not.toBeNull(); - expect(result!.reason).toBe("count"); - }); - }); - describe("tokens mode", () => { it("should use token-based compaction when utilization is high", () => { const messages = createMessages(100); // ~300 message tokens (real estimator: ~3 tokens/msg) - // systemPromptTokens ≈ 7, reserveTokens = 0 - // available = 500 - 7 = 493 - // utilization = (300 * 1.5) / 493 ≈ 0.91 > 0.8 → should compact + // systemPromptTokens ≈ 4, reserveTokens = 0 + // available = 400 - 4 = 396 + // utilization = (300 * 1.2) / 396 ≈ 0.91 > 0.8 → should compact const result = compactMessages(messages, { mode: "tokens", - contextWindowTokens: 500, + contextWindowTokens: 400, systemPrompt: "System prompt", reserveTokens: 0, }); @@ -207,8 +91,8 @@ describe("compaction", () => { it("should return null when utilization is low", () => { const messages = createMessages(5); // ~15 message tokens - // available = 10000 - 7 - 1024 = 8969 - // utilization = (15 * 1.5) / 8969 ≈ 0.003 < 0.8 + // available = 10000 - 4 - 1024 = 8972 + // utilization = (15 * 1.2) / 8972 ≈ 0.002 < 0.8 const result = compactMessages(messages, { mode: "tokens", contextWindowTokens: 10000, diff --git a/packages/core/src/agent/session/compaction.ts b/packages/core/src/agent/session/compaction.ts index 3651f972..5543588b 100644 --- a/packages/core/src/agent/session/compaction.ts +++ b/packages/core/src/agent/session/compaction.ts @@ -1,11 +1,9 @@ import type { AgentMessage } from "@mariozechner/pi-agent-core"; import type { Model } from "@mariozechner/pi-ai"; import { - estimateMessagesTokens, compactMessagesTokenAware, estimateTokenUsage, shouldCompact as shouldCompactTokens, - compactMessagesWithSummary, compactMessagesWithChunkedSummary, COMPACTION_TARGET_RATIO, MIN_KEEP_MESSAGES, @@ -30,96 +28,12 @@ export type CompactionResult = { fileOperations?: { readFiles: string[]; modifiedFiles: string[] } | undefined; /** Tool failures extracted from compacted messages */ toolFailures?: Array<{ toolName: string; summary: string }> | undefined; - /** Reason for compaction: count, tokens, summary, or pruning (tool result trimming only) */ - reason: "count" | "tokens" | "summary" | "pruning"; + /** Reason for compaction: tokens, summary, or pruning (tool result trimming only) */ + reason: "tokens" | "summary" | "pruning"; /** Tool result pruning statistics (when Phase 1 pruning was applied) */ pruningStats?: PruningStats | undefined; }; -/** - * Find a safe compaction point that doesn't break tool_use/tool_result pairs. - * Returns the index to start keeping messages from. - */ -function findSafeCompactionPoint(messages: AgentMessage[], targetStart: number): number { - let start = targetStart; - - // Move forward until we find a safe starting point - while (start < messages.length) { - const msg = messages[start]; - if (!msg) { - start++; - continue; - } - - // Safe to start from a user message - if (msg.role === "user") { - // But make sure it's not a toolResult without corresponding tool_use - const msgAny = msg as any; - if (Array.isArray(msgAny.content)) { - const hasToolResult = msgAny.content.some((b: any) => b.type === "tool_result"); - if (!hasToolResult) { - break; // Safe: user message without tool_result - } - } else { - break; // Safe: simple user message - } - } - - // toolResult messages need their corresponding tool_use, skip them - // assistant messages are ok to start from if they don't reference missing tool calls - if (msg.role === "assistant") { - // Check if previous messages have the required tool_use for any following tool_result - const nextMsg = messages[start + 1]; - if (nextMsg && nextMsg.role === "user") { - const nextAny = nextMsg as any; - if (Array.isArray(nextAny.content)) { - const hasToolResult = nextAny.content.some((b: any) => b.type === "tool_result"); - if (hasToolResult) { - // This assistant message has tool_use that's needed by next message - break; - } - } - } - } - - start++; - } - - return start; -} - -/** - * Simple compression based on message count (legacy logic, maintains backward compatibility) - */ -export function compactMessagesByCount( - messages: AgentMessage[], - maxMessages: number, - keepLast: number, -): CompactionResult | null { - if (messages.length <= maxMessages) return null; - - const targetStart = messages.length - keepLast; - const safeStart = findSafeCompactionPoint(messages, targetStart); - - // If we can't find a safe point, don't compact - if (safeStart >= messages.length) { - return null; - } - - const kept = messages.slice(safeStart); - - // Don't compact if we'd keep almost everything anyway - if (kept.length >= messages.length - 2) { - return null; - } - - return { - kept, - removedCount: messages.length - kept.length, - reason: "count", - }; -} - /** * Token-based intelligent compression */ @@ -143,13 +57,9 @@ export function compactMessagesByTokens( }; } -/** Synchronous compaction options (count/tokens modes) */ -export type SyncCompactionOptions = { - mode: "count" | "tokens"; - // count mode parameters - maxMessages?: number | undefined; - keepLast?: number | undefined; - // tokens mode parameters +/** Token-based compaction options */ +export type TokenCompactionOptions = { + mode: "tokens"; contextWindowTokens?: number | undefined; systemPrompt?: string | undefined; reserveTokens?: number | undefined; @@ -163,39 +73,28 @@ export type SummaryCompactionOptions = { // Required parameters model: Model; apiKey: string; - // tokens mode parameters (reused) + // Token parameters (reused) contextWindowTokens?: number | undefined; systemPrompt?: string | undefined; reserveTokens?: number | undefined; targetRatio?: number | undefined; minKeepMessages?: number | undefined; - // summary-specific parameters + // Summary-specific parameters customInstructions?: string | undefined; previousSummary?: string | undefined; signal?: AbortSignal | undefined; maxChunkTokens?: number | undefined; }; -export type CompactionOptions = SyncCompactionOptions | SummaryCompactionOptions; +export type CompactionOptions = TokenCompactionOptions | SummaryCompactionOptions; /** - * Unified compaction entry point (synchronous version, for count/tokens modes) - * - * Selects compaction strategy based on mode + * Synchronous token-based compaction */ export function compactMessages( messages: AgentMessage[], - options: SyncCompactionOptions, + options: TokenCompactionOptions, ): CompactionResult | null { - if (options.mode === "count") { - return compactMessagesByCount( - messages, - options.maxMessages ?? 80, - options.keepLast ?? 60, - ); - } - - // Token mode const contextWindowTokens = options.contextWindowTokens ?? 200_000; const estimation = estimateTokenUsage({ messages, @@ -204,7 +103,6 @@ export function compactMessages( reserveTokens: options.reserveTokens, }); - // 检查是否需要压缩 if (!shouldCompactTokens(estimation)) { return null; } diff --git a/packages/core/src/agent/session/session-manager.ts b/packages/core/src/agent/session/session-manager.ts index d11773c6..3b901fdd 100644 --- a/packages/core/src/agent/session/session-manager.ts +++ b/packages/core/src/agent/session/session-manager.ts @@ -33,12 +33,8 @@ export type SessionManagerOptions = { baseDir?: string | undefined; // Compaction mode configuration - /** Compaction mode: "count" uses message count, "tokens" uses token awareness, "summary" uses LLM summary */ - compactionMode?: "count" | "tokens" | "summary" | undefined; - - // Count mode parameters - maxMessages?: number | undefined; - keepLast?: number | undefined; + /** Compaction mode: "tokens" uses token awareness, "summary" uses LLM summary (default) */ + compactionMode?: "tokens" | "summary" | undefined; // Token mode parameters /** Context window token count */ @@ -61,7 +57,7 @@ export type SessionManagerOptions = { customInstructions?: string | undefined; // Tool result pruning - /** Whether to enable tool result pruning before compaction (default: true in tokens/summary mode) */ + /** Whether to enable tool result pruning before compaction (default: true) */ enableToolResultPruning?: boolean | undefined; /** Tool result pruning settings */ toolResultPruning?: Partial | undefined; @@ -74,10 +70,7 @@ export type SessionManagerOptions = { export class SessionManager { private readonly sessionId: string; private readonly baseDir: string | undefined; - private readonly compactionMode: "count" | "tokens" | "summary"; - // Count mode - private readonly maxMessages: number; - private readonly keepLast: number; + private readonly compactionMode: "tokens" | "summary"; // Token mode private readonly contextWindowTokens: number; private systemPrompt: string | undefined; @@ -105,10 +98,6 @@ export class SessionManager { // Compaction mode (default: summary with LLM-based summarization) this.compactionMode = options.compactionMode ?? "summary"; - // Count mode parameters - this.maxMessages = options.maxMessages ?? 80; - this.keepLast = options.keepLast ?? 60; - // Token mode parameters this.contextWindowTokens = options.contextWindowTokens ?? 200_000; this.systemPrompt = options.systemPrompt; @@ -121,10 +110,8 @@ export class SessionManager { this.apiKey = options.apiKey; this.customInstructions = options.customInstructions; - // Tool result pruning (enabled by default in tokens/summary mode) - this.enableToolResultPruning = - options.enableToolResultPruning ?? - (this.compactionMode === "tokens" || this.compactionMode === "summary"); + // Tool result pruning (enabled by default) + this.enableToolResultPruning = options.enableToolResultPruning ?? true; this.toolResultPruning = options.toolResultPruning; // Observability @@ -164,7 +151,7 @@ export class SessionManager { /** * Get current compaction mode */ - getCompactionMode(): "count" | "tokens" | "summary" { + getCompactionMode(): "tokens" | "summary" { return this.compactionMode; } @@ -264,10 +251,6 @@ export class SessionManager { /** Check whether compaction would trigger for the given messages (without executing it) */ needsCompaction(messages: AgentMessage[]): boolean { - if (this.compactionMode === "count") { - return messages.length > this.maxMessages; - } - // Token and summary modes use the same token-based threshold const estimation = estimateTokenUsage({ messages, systemPrompt: this.systemPrompt, @@ -376,12 +359,9 @@ export class SessionManager { } } } else { + // tokens mode result = compactMessages(workingMessages, { - mode: this.compactionMode, - // Count mode parameters - maxMessages: this.maxMessages, - keepLast: this.keepLast, - // Token mode parameters + mode: "tokens", contextWindowTokens: this.contextWindowTokens, systemPrompt: this.systemPrompt, reserveTokens: this.reserveTokens, diff --git a/packages/core/src/agent/types.ts b/packages/core/src/agent/types.ts index 3db04fca..7663bba6 100644 --- a/packages/core/src/agent/types.ts +++ b/packages/core/src/agent/types.ts @@ -50,11 +50,10 @@ export type AgentOptions = { reserveTokens?: number | undefined; /** * Compaction mode: - * - "count": uses legacy message count * - "tokens": uses token awareness * - "summary": uses LLM to generate summary (default) */ - compactionMode?: "count" | "tokens" | "summary" | undefined; + compactionMode?: "tokens" | "summary" | undefined; /** Compaction target utilization ratio (0-1), defaults to 0.5 */ compactionTargetRatio?: number | undefined; /** Minimum messages to keep, defaults to 10 */