multica/src/agent/session/compaction.ts
Jiayuan 5931e8f84e
feat(agent): add auto-backgrounding and process management (#17)
* feat(agent): add auto-backgrounding to exec tool

- Add yieldMs parameter to exec tool (default 5s) - commands that don't
  complete within this time automatically run in background
- Create shared process-registry.ts for unified process management
- Refactor process.ts to use shared registry
- Add --debug CLI flag for session message logging
- Signal isolation: backgrounded processes ignore abort signals

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

* fix(session): preserve tool_use/tool_result pairs during compaction

Previously, session compaction simply kept the last N messages, which
could break tool_use/tool_result pairs if the cut point fell between
them. This caused "tool_call_id is not found" errors from the API.

Now compaction finds a safe cut point that starts from either:
- A user message without tool_result
- An assistant message whose tool_use is needed by the next tool_result

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

* feat(session): use Kimi as default model for summary compaction

- Auto-detect MOONSHOT_API_KEY from environment
- Use moonshot-v1-128k (cheaper than k2-thinking)
- Fall back to tokens mode if API key not available

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

* docs: add rule to never use git commit --amend

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

* docs: clarify git amend rule for immediate fixes

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-30 04:22:42 +08:00

253 lines
7.1 KiB
TypeScript

import type { AgentMessage } from "@mariozechner/pi-agent-core";
import type { Model } from "@mariozechner/pi-ai";
import {
estimateMessagesTokens,
compactMessagesTokenAware,
estimateTokenUsage,
shouldCompact as shouldCompactTokens,
compactMessagesWithSummary,
compactMessagesWithChunkedSummary,
COMPACTION_TARGET_RATIO,
MIN_KEEP_MESSAGES,
} from "../context-window/index.js";
export type CompactionResult = {
kept: AgentMessage[];
removedCount: number;
/** Additional information in token-aware mode */
tokensRemoved?: number | undefined;
tokensKept?: number | undefined;
/** Summary generated in summary mode */
summary?: string | undefined;
reason: "count" | "tokens" | "summary";
};
/**
* Find a safe compaction point that doesn't break tool_use/tool_result pairs.
* Returns the index to start keeping messages from.
*/
function findSafeCompactionPoint(messages: AgentMessage[], targetStart: number): number {
let start = targetStart;
// Move forward until we find a safe starting point
while (start < messages.length) {
const msg = messages[start];
if (!msg) {
start++;
continue;
}
// Safe to start from a user message
if (msg.role === "user") {
// But make sure it's not a toolResult without corresponding tool_use
const msgAny = msg as any;
if (Array.isArray(msgAny.content)) {
const hasToolResult = msgAny.content.some((b: any) => b.type === "tool_result");
if (!hasToolResult) {
break; // Safe: user message without tool_result
}
} else {
break; // Safe: simple user message
}
}
// toolResult messages need their corresponding tool_use, skip them
// assistant messages are ok to start from if they don't reference missing tool calls
if (msg.role === "assistant") {
// Check if previous messages have the required tool_use for any following tool_result
const nextMsg = messages[start + 1];
if (nextMsg && nextMsg.role === "user") {
const nextAny = nextMsg as any;
if (Array.isArray(nextAny.content)) {
const hasToolResult = nextAny.content.some((b: any) => b.type === "tool_result");
if (hasToolResult) {
// This assistant message has tool_use that's needed by next message
break;
}
}
}
}
start++;
}
return start;
}
/**
* Simple compression based on message count (legacy logic, maintains backward compatibility)
*/
export function compactMessagesByCount(
messages: AgentMessage[],
maxMessages: number,
keepLast: number,
): CompactionResult | null {
if (messages.length <= maxMessages) return null;
const targetStart = messages.length - keepLast;
const safeStart = findSafeCompactionPoint(messages, targetStart);
// If we can't find a safe point, don't compact
if (safeStart >= messages.length) {
return null;
}
const kept = messages.slice(safeStart);
// Don't compact if we'd keep almost everything anyway
if (kept.length >= messages.length - 2) {
return null;
}
return {
kept,
removedCount: messages.length - kept.length,
reason: "count",
};
}
/**
* Token-based intelligent compression
*/
export function compactMessagesByTokens(
messages: AgentMessage[],
availableTokens: number,
options?: {
targetRatio?: number;
minKeepMessages?: number;
},
): CompactionResult | null {
const result = compactMessagesTokenAware(messages, availableTokens, options);
if (!result) return null;
return {
kept: result.kept,
removedCount: result.removedCount,
tokensRemoved: result.tokensRemoved,
tokensKept: result.tokensKept,
reason: "tokens",
};
}
/** Synchronous compaction options (count/tokens modes) */
export type SyncCompactionOptions = {
mode: "count" | "tokens";
// count mode parameters
maxMessages?: number | undefined;
keepLast?: number | undefined;
// tokens mode parameters
contextWindowTokens?: number | undefined;
systemPrompt?: string | undefined;
reserveTokens?: number | undefined;
targetRatio?: number | undefined;
minKeepMessages?: number | undefined;
};
/** Summary compaction options (summary mode) */
export type SummaryCompactionOptions = {
mode: "summary";
// Required parameters
model: Model<any>;
apiKey: string;
// tokens mode parameters (reused)
contextWindowTokens?: number | undefined;
systemPrompt?: string | undefined;
reserveTokens?: number | undefined;
targetRatio?: number | undefined;
minKeepMessages?: number | undefined;
// summary-specific parameters
customInstructions?: string | undefined;
previousSummary?: string | undefined;
signal?: AbortSignal | undefined;
maxChunkTokens?: number | undefined;
};
export type CompactionOptions = SyncCompactionOptions | SummaryCompactionOptions;
/**
* Unified compaction entry point (synchronous version, for count/tokens modes)
*
* Selects compaction strategy based on mode
*/
export function compactMessages(
messages: AgentMessage[],
options: SyncCompactionOptions,
): CompactionResult | null {
if (options.mode === "count") {
return compactMessagesByCount(
messages,
options.maxMessages ?? 80,
options.keepLast ?? 60,
);
}
// Token mode
const contextWindowTokens = options.contextWindowTokens ?? 200_000;
const estimation = estimateTokenUsage({
messages,
systemPrompt: options.systemPrompt,
contextWindowTokens,
reserveTokens: options.reserveTokens,
});
// 检查是否需要压缩
if (!shouldCompactTokens(estimation)) {
return null;
}
return compactMessagesByTokens(messages, estimation.availableTokens, {
targetRatio: options.targetRatio ?? COMPACTION_TARGET_RATIO,
minKeepMessages: options.minKeepMessages ?? MIN_KEEP_MESSAGES,
});
}
/**
* Summary-based compaction (asynchronous version)
*
* Uses LLM to generate summary of historical messages
*/
export async function compactMessagesAsync(
messages: AgentMessage[],
options: SummaryCompactionOptions,
): Promise<CompactionResult | null> {
const contextWindowTokens = options.contextWindowTokens ?? 200_000;
const estimation = estimateTokenUsage({
messages,
systemPrompt: options.systemPrompt,
contextWindowTokens,
reserveTokens: options.reserveTokens,
});
// Check if compaction is needed
if (!shouldCompactTokens(estimation)) {
return null;
}
// Use chunked summary to handle very large history
const result = await compactMessagesWithChunkedSummary({
messages,
model: options.model,
apiKey: options.apiKey,
availableTokens: estimation.availableTokens,
targetRatio: options.targetRatio ?? COMPACTION_TARGET_RATIO,
minKeepMessages: options.minKeepMessages ?? MIN_KEEP_MESSAGES,
reserveTokens: options.reserveTokens ?? 2048,
customInstructions: options.customInstructions,
previousSummary: options.previousSummary,
signal: options.signal,
maxChunkTokens: options.maxChunkTokens,
});
if (!result) {
return null;
}
return {
kept: result.kept,
removedCount: result.removedCount,
tokensRemoved: result.tokensRemoved,
tokensKept: result.tokensKept,
summary: result.summary,
reason: "summary",
};
}