fix(compaction): prevent double-counting in split-turn detection

- detectSplitTurn now returns adjustedToSummarize (truncated at assistantIndex) so split prefix messages are not summarized twice and removedCount is not inflated - Remove unused contextWindow parameter from computeAdaptiveChunkRatio - Remove duplicated single-chunk fast path in compactMessagesWithChunkedSummary (the multi-chunk loop handles 1 chunk naturally) - Fix triple-newline formatting in buildPlainTextFallback by concatenating metadata sections directly instead of joining through parts array Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-13 13:33:31 +08:00 · 2026-02-13 13:33:31 +08:00 · d7e85d0c25
commit d7e85d0c25
parent 9e7907a814
2 changed files with 48 additions and 86 deletions
--- a/packages/core/src/agent/context-window/summarization.ts
+++ b/packages/core/src/agent/context-window/summarization.ts
@ -133,17 +133,20 @@ export function splitMessagesForSummary(
 * Detect and fix a "split turn" — when the first kept message is a user message
 * containing tool_result blocks without the corresponding assistant tool_use.
 *
- * When detected, moves the orphaned tool_result (and its preceding assistant
- * tool_use) from toKeep back to toSummarize so they can be summarized together.
+ * When detected, separates the orphaned turn (assistant tool_use + user tool_result)
+ * into a `splitPrefix` for separate summarization, and returns adjusted arrays
+ * where `toSummarize` no longer contains those messages.
 *
- * Returns { splitPrefix } containing the separated turn prefix messages,
- * or null if no split turn was detected. The caller should summarize the
- * prefix separately and prepend it.
+ * Returns null if no split turn was detected.
 */
 export function detectSplitTurn(
  toSummarize: AgentMessage[],
  toKeep: AgentMessage[],
-): { splitPrefix: AgentMessage[]; adjustedToKeep: AgentMessage[] } | null {
+): {
+  splitPrefix: AgentMessage[];
+  adjustedToSummarize: AgentMessage[];
+  adjustedToKeep: AgentMessage[];
+} | null {
  if (toKeep.length === 0) return null;

  const firstKept = toKeep[0]!;
@ -157,7 +160,6 @@ export function detectSplitTurn(
  if (!hasToolResult) return null;

  // This is an orphaned tool_result — look back in toSummarize for the assistant tool_use
-  // Find the last assistant message in toSummarize that has matching tool_use
  const toolResultIds = new Set(
    content
      .filter((b: any) => b.type === "tool_result")
@ -185,14 +187,16 @@ export function detectSplitTurn(

  if (assistantIndex < 0) return null;

-  // Separate the split prefix: assistant tool_use + orphaned user tool_result
+  // Split prefix: messages from assistantIndex to end of toSummarize + orphaned firstKept
  const splitPrefix = [
    ...toSummarize.slice(assistantIndex),
    firstKept,
  ];
+  // Truncate toSummarize so the split prefix messages are NOT double-counted
+  const adjustedToSummarize = toSummarize.slice(0, assistantIndex);
  const adjustedToKeep = toKeep.slice(1);

-  return { splitPrefix, adjustedToKeep };
+  return { splitPrefix, adjustedToSummarize, adjustedToKeep };
 }

 // ── Adaptive Chunk Ratio ───────────────────────────────────────────────────
@ -201,13 +205,12 @@ const ADAPTIVE_CHUNK_MIN = 0.15;
 const ADAPTIVE_CHUNK_MAX = 0.4;

 /**
- * Compute adaptive chunk size based on average message token count.
- * Larger average messages → smaller chunks (to avoid exceeding limits).
- * Range: [0.15, 0.4] × contextWindow
+ * Compute adaptive chunk ratio based on average message token count.
+ * Larger average messages → smaller ratio (to avoid exceeding limits).
+ * Return value range: [0.15, 0.4] — multiply by availableTokens to get chunk size.
 */
 export function computeAdaptiveChunkRatio(
  messages: AgentMessage[],
-  contextWindow: number,
 ): number {
  if (messages.length === 0) return ADAPTIVE_CHUNK_MAX;

@ -273,6 +276,7 @@ export async function compactMessagesWithSummary(
  let splitPrefixSummary = "";

  if (splitTurn) {
+    toSummarize = splitTurn.adjustedToSummarize;
    toKeep = splitTurn.adjustedToKeep;

    // Summarize the split prefix separately
@ -290,26 +294,30 @@ export async function compactMessagesWithSummary(
    splitPrefixSummary = prefixResult.summary;
  }

-  // Generate summary with fallback
+  // Generate summary with fallback (toSummarize no longer contains split prefix messages)
  const instructions = customInstructions || DEFAULT_SUMMARY_INSTRUCTIONS;
-  const { summary } = await summarizeWithFallback({
-    messages: toSummarize,
-    model,
-    reserveTokens,
-    apiKey,
-    signal,
-    instructions,
-    previousSummary,
-    availableTokens,
-  });
+  let finalSummary = "";

-  // Append split prefix summary if present
-  let finalSummary = summary;
-  if (splitPrefixSummary) {
-    finalSummary += `\n\n## Split Turn Context\n${splitPrefixSummary}`;
+  if (toSummarize.length > 0) {
+    const { summary } = await summarizeWithFallback({
+      messages: toSummarize,
+      model,
+      reserveTokens,
+      apiKey,
+      signal,
+      instructions,
+      previousSummary,
+      availableTokens,
+    });
+    finalSummary = summary;
  }

-  // Append metadata sections
+  // Append split prefix summary if present
+  if (splitPrefixSummary) {
+    finalSummary += (finalSummary ? "\n\n" : "") + `## Split Turn Context\n${splitPrefixSummary}`;
+  }
+
+  // Append metadata sections (all compacted = adjusted toSummarize + splitPrefix)
  const allCompactedMessages = splitTurn
    ? [...toSummarize, ...splitTurn.splitPrefix]
    : toSummarize;
@ -381,6 +389,7 @@ export async function compactMessagesWithChunkedSummary(
  let splitPrefixSummary = "";

  if (splitTurn) {
+    toSummarize = splitTurn.adjustedToSummarize;
    toKeep = splitTurn.adjustedToKeep;

    // Summarize the split prefix separately
@ -399,57 +408,10 @@ export async function compactMessagesWithChunkedSummary(
  }

  // Compute adaptive chunk size
-  const chunkRatio = computeAdaptiveChunkRatio(toSummarize, availableTokens);
+  const chunkRatio = computeAdaptiveChunkRatio(toSummarize);
  const maxChunkTokens = params.maxChunkTokens ?? Math.floor(availableTokens * chunkRatio);

-  // If messages to summarize fit in one chunk, delegate to single-pass compaction
-  const toSummarizeTokens = estimateMessagesTokens(toSummarize);
-  if (toSummarizeTokens <= maxChunkTokens) {
-    // For single-chunk, use the non-chunked path but still handle split turn
-    const instructions = customInstructions || DEFAULT_SUMMARY_INSTRUCTIONS;
-    const { summary } = await summarizeWithFallback({
-      messages: toSummarize,
-      model,
-      reserveTokens,
-      apiKey,
-      signal,
-      instructions,
-      previousSummary,
-      availableTokens,
-    });
-
-    let finalSummary = summary;
-    if (splitPrefixSummary) {
-      finalSummary += `\n\n## Split Turn Context\n${splitPrefixSummary}`;
-    }
-
-    // Append metadata
-    const allCompactedMessages = splitTurn
-      ? [...toSummarize, ...splitTurn.splitPrefix]
-      : toSummarize;
-    const failures = collectToolFailures(allCompactedMessages);
-    const fileOps = collectFileOperations(allCompactedMessages);
-    finalSummary += formatToolFailuresSection(failures);
-    finalSummary += formatFileOperationsSection(fileOps);
-
-    const summaryMessage = createSummaryMessage(finalSummary);
-    const kept = [summaryMessage, ...toKeep];
-    const tokensRemoved = estimateMessagesTokens(allCompactedMessages);
-    const tokensKept = estimateMessagesTokens(kept);
-
-    return {
-      kept,
-      removedCount: allCompactedMessages.length,
-      tokensRemoved,
-      tokensKept,
-      summary: finalSummary,
-      reason: "summary",
-      fileOperations: (fileOps.readFiles.length > 0 || fileOps.modifiedFiles.length > 0) ? fileOps : undefined,
-      toolFailures: failures.length > 0 ? failures : undefined,
-    };
-  }
-
-  // Process in chunks
+  // Process in chunks (works naturally for single-chunk case too)
  const chunks: AgentMessage[][] = [];
  let currentChunk: AgentMessage[] = [];
  let currentTokens = 0;
@ -496,10 +458,10 @@ export async function compactMessagesWithChunkedSummary(

  // Append split prefix summary if present
  if (splitPrefixSummary) {
-    finalSummary += `\n\n## Split Turn Context\n${splitPrefixSummary}`;
+    finalSummary += (finalSummary ? "\n\n" : "") + `## Split Turn Context\n${splitPrefixSummary}`;
  }

-  // Append metadata sections
+  // Append metadata sections (all compacted = adjusted toSummarize + splitPrefix)
  const allCompactedMessages = splitTurn
    ? [...toSummarize, ...splitTurn.splitPrefix]
    : toSummarize;
--- a/packages/core/src/agent/context-window/summary-fallback.ts
+++ b/packages/core/src/agent/context-window/summary-fallback.ts
@ -115,14 +115,14 @@ function buildPlainTextFallback(
    `Below is automatically extracted metadata from the removed messages.`,
  );

-  // Extract and append metadata
+  // Extract and append metadata (format functions return strings with leading \n,
+  // designed for direct concatenation — so we concatenate rather than join)
  const failures = collectToolFailures(messages);
  const fileOps = collectFileOperations(messages);
-  const failureSection = formatToolFailuresSection(failures);
-  const fileOpsSection = formatFileOperationsSection(fileOps);

-  if (failureSection) parts.push(failureSection);
-  if (fileOpsSection) parts.push(fileOpsSection);
+  let result = parts.join("\n\n");
+  result += formatToolFailuresSection(failures);
+  result += formatFileOperationsSection(fileOps);

-  return parts.join("\n\n");
+  return result;
 }