refactor(compaction): remove pre-flight tool result pruning

Pre-flight compaction runs in-memory only (not persisted), so tool
result pruning in this path was wasted work — results were thrown away
after the LLM call. Post-turn compaction still handles pruning and
persists the results. Only Phase 2 (emergency message drop) remains
as a safety net in pre-flight.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Jiayuan Zhang 2026-02-15 21:37:15 +08:00
parent fbb0b11c6e
commit 92cf312843

View file

@ -27,9 +27,6 @@ import {
compactMessagesTokenAware,
MIN_KEEP_MESSAGES,
} from "./context-window/index.js";
import {
pruneToolResults,
} from "./context-window/tool-result-pruning.js";
import { mergeToolsConfig, type ToolsConfig } from "./tools/policy.js";
import {
loadAuthProfileStore,
@ -866,37 +863,12 @@ export class Agent {
const originalCount = messages.length;
let result = messages;
// Phase 1: Prune tool results (soft trim + hard clear)
const pruneResult = pruneToolResults({
messages: result,
contextWindowTokens: this.contextWindowGuard.tokens,
});
if (pruneResult.changed) {
result = pruneResult.messages;
if (pruneResult.softTrimmed > 0 || pruneResult.hardCleared > 0) {
this.runLog.log("tool_result_pruning", {
soft_trimmed: pruneResult.softTrimmed,
hard_cleared: pruneResult.hardCleared,
chars_saved: pruneResult.charsSaved,
phase: "preflight",
});
}
}
// Re-estimate after pruning
const afterPrune = estimateTokenUsage({
messages: result,
systemPrompt: this.agent.state.systemPrompt,
contextWindowTokens: this.contextWindowGuard.tokens,
reserveTokens: this.reserveTokens,
});
// Phase 2: Drop oldest messages if still over threshold
if (afterPrune.utilizationRatio >= COMPACTION_TRIGGER_RATIO) {
const compacted = compactMessagesTokenAware(result, afterPrune.availableTokens);
if (compacted) {
result = compacted.kept;
}
// Drop oldest messages if over threshold (emergency safety net).
// Tool result pruning is skipped here — it's handled by post-turn
// compaction which actually persists the results.
const compacted = compactMessagesTokenAware(result, estimation.availableTokens);
if (compacted) {
result = compacted.kept;
}
if (result.length < originalCount) {