feat(compaction): add metadata extraction and multi-level summary fallback

New modules for improved compaction: - compaction-metadata.ts: extracts file operations (read/modified) and tool failures from compacted messages, appended to summaries for context retention - summary-fallback.ts: 3-level degradation chain (full LLM summary → filtered summary excluding oversized messages → plain-text fallback with metadata) Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-13 13:26:22 +08:00 · 2026-02-13 13:26:22 +08:00 · ba44de89b7
commit ba44de89b7
parent 7acf4cc4a5
2 changed files with 284 additions and 0 deletions
--- a/packages/core/src/agent/context-window/compaction-metadata.ts
+++ b/packages/core/src/agent/context-window/compaction-metadata.ts
@ -0,0 +1,156 @@
+/**
+ * Compaction Metadata — extract file operations & tool failures from compacted messages
+ *
+ * Appended to summaries so the agent retains awareness of what files were touched
+ * and which tool invocations failed, even after the original messages are removed.
+ */
+
+import type { AgentMessage } from "@mariozechner/pi-agent-core";
+
+// ── Types ──────────────────────────────────────────────────────────────────
+
+export type ToolFailure = {
+  toolName: string;
+  summary: string;
+};
+
+export type FileOperations = {
+  readFiles: string[];
+  modifiedFiles: string[];
+};
+
+// ── Tool failure extraction ────────────────────────────────────────────────
+
+const MAX_TOOL_FAILURES = 8;
+const ERROR_SUMMARY_MAX_LEN = 240;
+
+/**
+ * Collect tool failures (is_error: true tool_result blocks) from messages.
+ * Deduplicates by toolCallId and caps at MAX_TOOL_FAILURES.
+ */
+export function collectToolFailures(messages: AgentMessage[]): ToolFailure[] {
+  const seen = new Set<string>();
+  const failures: ToolFailure[] = [];
+
+  // First pass: collect tool_use names keyed by id
+  const toolNameById = new Map<string, string>();
+  for (const msg of messages) {
+    if (msg.role !== "assistant") continue;
+    const content = (msg as any).content;
+    if (!Array.isArray(content)) continue;
+    for (const block of content) {
+      if (block.type === "tool_use" && block.id && block.name) {
+        toolNameById.set(block.id, block.name);
+      }
+    }
+  }
+
+  // Second pass: find is_error tool_result blocks
+  for (const msg of messages) {
+    if (msg.role !== "user") continue;
+    const content = (msg as any).content;
+    if (!Array.isArray(content)) continue;
+    for (const block of content) {
+      if (block.type !== "tool_result") continue;
+      if (!block.is_error) continue;
+
+      const toolCallId: string = block.tool_use_id ?? block.id ?? "";
+      if (!toolCallId || seen.has(toolCallId)) continue;
+      seen.add(toolCallId);
+
+      const toolName = toolNameById.get(toolCallId) ?? "unknown";
+      let errorText = typeof block.content === "string"
+        ? block.content
+        : Array.isArray(block.content)
+          ? block.content.map((b: any) => (typeof b === "string" ? b : b.text ?? "")).join(" ")
+          : String(block.content ?? "");
+      if (errorText.length > ERROR_SUMMARY_MAX_LEN) {
+        errorText = errorText.slice(0, ERROR_SUMMARY_MAX_LEN) + "...";
+      }
+
+      failures.push({ toolName, summary: errorText });
+      if (failures.length >= MAX_TOOL_FAILURES) return failures;
+    }
+  }
+
+  return failures;
+}
+
+// ── File operation extraction ──────────────────────────────────────────────
+
+const READ_TOOL_NAMES = new Set(["Read", "read_file"]);
+const WRITE_TOOL_NAMES = new Set(["Write", "Edit", "write_file", "file_edit"]);
+
+/**
+ * Collect file read/modify operations from assistant tool_use blocks.
+ * readFiles excludes any path that also appears in modifiedFiles.
+ */
+export function collectFileOperations(messages: AgentMessage[]): FileOperations {
+  const readSet = new Set<string>();
+  const modifiedSet = new Set<string>();
+
+  for (const msg of messages) {
+    if (msg.role !== "assistant") continue;
+    const content = (msg as any).content;
+    if (!Array.isArray(content)) continue;
+
+    for (const block of content) {
+      if (block.type !== "tool_use") continue;
+      const name: string = block.name ?? "";
+      const input: any = block.input ?? {};
+
+      // Extract file path from common parameter names
+      const filePath: string | undefined =
+        input.file_path ?? input.path ?? input.filePath ?? input.filename;
+      if (!filePath || typeof filePath !== "string") continue;
+
+      if (READ_TOOL_NAMES.has(name)) {
+        readSet.add(filePath);
+      } else if (WRITE_TOOL_NAMES.has(name)) {
+        modifiedSet.add(filePath);
+      }
+    }
+  }
+
+  // Remove modified files from readFiles (to avoid duplication)
+  for (const path of modifiedSet) {
+    readSet.delete(path);
+  }
+
+  return {
+    readFiles: [...readSet],
+    modifiedFiles: [...modifiedSet],
+  };
+}
+
+// ── Formatting ─────────────────────────────────────────────────────────────
+
+/**
+ * Format tool failures as a markdown section.
+ * Returns empty string if no failures.
+ */
+export function formatToolFailuresSection(failures: ToolFailure[]): string {
+  if (failures.length === 0) return "";
+
+  const lines = failures.map(
+    (f) => `- **${f.toolName}**: ${f.summary}`,
+  );
+  return `\n## Tool Failures\n${lines.join("\n")}`;
+}
+
+/**
+ * Format file operations as XML sections.
+ * Returns empty string if no operations.
+ */
+export function formatFileOperationsSection(ops: FileOperations): string {
+  const parts: string[] = [];
+
+  if (ops.readFiles.length > 0) {
+    parts.push(`<read-files>\n${ops.readFiles.join("\n")}\n</read-files>`);
+  }
+  if (ops.modifiedFiles.length > 0) {
+    parts.push(`<modified-files>\n${ops.modifiedFiles.join("\n")}\n</modified-files>`);
+  }
+
+  return parts.length > 0 ? "\n" + parts.join("\n") : "";
+}
--- a/packages/core/src/agent/context-window/summary-fallback.ts
+++ b/packages/core/src/agent/context-window/summary-fallback.ts
@ -0,0 +1,128 @@
+/**
+ * Summary Fallback — multi-level degradation for summary compaction
+ *
+ * Level 1: Full LLM summary via generateSummary()
+ * Level 2: Exclude oversized messages (> 50% context window), retry summary
+ * Level 3: Plain-text fallback summary (with metadata: file ops + tool failures)
+ */
+
+import type { AgentMessage } from "@mariozechner/pi-agent-core";
+import { generateSummary, estimateTokens } from "@mariozechner/pi-coding-agent";
+import type { Model } from "@mariozechner/pi-ai";
+import {
+  collectToolFailures,
+  collectFileOperations,
+  formatToolFailuresSection,
+  formatFileOperationsSection,
+} from "./compaction-metadata.js";
+
+export type SummarizeWithFallbackParams = {
+  /** Messages to summarize */
+  messages: AgentMessage[];
+  /** LLM model */
+  model: Model<any>;
+  /** Max tokens reserved for summary output */
+  reserveTokens: number;
+  /** API key */
+  apiKey: string;
+  /** AbortSignal */
+  signal?: AbortSignal | undefined;
+  /** Summary instructions */
+  instructions: string;
+  /** Previous summary for incremental context */
+  previousSummary?: string | undefined;
+  /** Available context window tokens (used for oversized-message filtering) */
+  availableTokens: number;
+};
+
+/**
+ * Attempt to generate an LLM summary with multi-level fallback.
+ *
+ * Returns { summary, level } where level indicates which fallback tier succeeded:
+ *   1 = full summary, 2 = filtered summary, 3 = plain-text fallback
+ */
+export async function summarizeWithFallback(
+  params: SummarizeWithFallbackParams,
+): Promise<{ summary: string; level: 1 | 2 | 3 }> {
+  const {
+    messages,
+    model,
+    reserveTokens,
+    apiKey,
+    signal,
+    instructions,
+    previousSummary,
+    availableTokens,
+  } = params;
+
+  // ── Level 1: Full summary ────────────────────────────────────────────
+  try {
+    const summary = await generateSummary(
+      messages,
+      model,
+      reserveTokens,
+      apiKey,
+      signal,
+      instructions,
+      previousSummary,
+    );
+    return { summary, level: 1 };
+  } catch (err) {
+    console.warn(`[summary-fallback] Level 1 (full summary) failed: ${err}`);
+  }
+
+  // ── Level 2: Exclude oversized messages, retry ───────────────────────
+  const oversizeThreshold = availableTokens * 0.5;
+  const filtered = messages.filter((msg) => estimateTokens(msg) <= oversizeThreshold);
+
+  if (filtered.length > 0 && filtered.length < messages.length) {
+    try {
+      const summary = await generateSummary(
+        filtered,
+        model,
+        reserveTokens,
+        apiKey,
+        signal,
+        instructions,
+        previousSummary,
+      );
+      return { summary, level: 2 };
+    } catch (err) {
+      console.warn(`[summary-fallback] Level 2 (filtered summary) failed: ${err}`);
+    }
+  }
+
+  // ── Level 3: Plain-text fallback with metadata ───────────────────────
+  const summary = buildPlainTextFallback(messages, previousSummary);
+  return { summary, level: 3 };
+}
+
+/**
+ * Build a plain-text fallback summary from metadata extraction only (no LLM).
+ */
+function buildPlainTextFallback(
+  messages: AgentMessage[],
+  previousSummary?: string,
+): string {
+  const parts: string[] = [];
+
+  if (previousSummary) {
+    parts.push(`## Previous Context\n${previousSummary}`);
+  }
+
+  parts.push(
+    `## Compaction Note\nLLM summarization was unavailable. ${messages.length} messages were compacted. ` +
+    `Below is automatically extracted metadata from the removed messages.`,
+  );
+
+  // Extract and append metadata
+  const failures = collectToolFailures(messages);
+  const fileOps = collectFileOperations(messages);
+  const failureSection = formatToolFailuresSection(failures);
+  const fileOpsSection = formatFileOperationsSection(fileOps);
+
+  if (failureSection) parts.push(failureSection);
+  if (fileOpsSection) parts.push(fileOpsSection);
+
+  return parts.join("\n\n");
+}