feat(agent): add context window management with token-aware compaction (#14)

* feat(agent): add context window guard to prevent token overflow

Implement token-aware context management that validates context window
size on agent initialization and provides intelligent message compaction
based on actual token usage rather than simple message count.

Key changes:
- Add context-window module with guard, token estimation, and types
- Support both "count" (legacy) and "tokens" (new default) compaction modes
- Warn when context window < 32K tokens, block when < 16K tokens
- Trigger compaction at 80% utilization, target 50% after compaction

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

* feat(agent): add summary-based compaction using LLM

Implement intelligent compaction that uses LLM to generate summaries
of older messages instead of simply truncating them. This preserves
important context like key decisions, TODOs, and technical details.

Key changes:
- Add summarization.ts with compactMessagesWithSummary functions
- Support chunked summarization for very large histories
- Add "summary" compaction mode alongside "count" and "tokens"
- Auto-resolve API key from environment based on provider
- Graceful fallback to "tokens" mode if model/apiKey unavailable

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Jiayuan 2026-01-30 03:46:11 +08:00 committed by GitHub
parent 3024e89071
commit 67cd46a072
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 1116 additions and 16 deletions

View file

@ -0,0 +1,110 @@
/**
* Context Window Guard -
*
* agent context window token
*/
import type { ContextWindowInfo, ContextWindowGuardResult, ContextWindowSource } from "./types.js";
/** 硬性最小 token 数,低于此值将阻止运行 */
export const CONTEXT_WINDOW_HARD_MIN_TOKENS = 16_000;
/** 警告阈值,低于此值会发出警告 */
export const CONTEXT_WINDOW_WARN_BELOW_TOKENS = 32_000;
/** 默认 context window当无法获取时 */
export const DEFAULT_CONTEXT_TOKENS = 200_000;
/**
*
*/
function normalizePositiveInt(value: unknown): number | null {
if (typeof value !== "number" || !Number.isFinite(value)) return null;
const int = Math.floor(value);
return int > 0 ? int : null;
}
/**
* context window
*
* model > config > default
*/
export function resolveContextWindowInfo(params: {
/** Model 的 contextWindow 属性 */
modelContextWindow?: number | undefined;
/** 配置中指定的 context tokens */
configContextTokens?: number | undefined;
/** 默认值 */
defaultTokens?: number | undefined;
}): ContextWindowInfo {
// 1. 尝试从 model 获取
const fromModel = normalizePositiveInt(params.modelContextWindow);
if (fromModel) {
return { tokens: fromModel, source: "model" };
}
// 2. 尝试从配置获取
const fromConfig = normalizePositiveInt(params.configContextTokens);
if (fromConfig) {
return { tokens: fromConfig, source: "config" };
}
// 3. 使用默认值
return {
tokens: Math.floor(params.defaultTokens ?? DEFAULT_CONTEXT_TOKENS),
source: "default",
};
}
/**
* context window guard
*
*
*/
export function evaluateContextWindowGuard(params: {
info: ContextWindowInfo;
warnBelowTokens?: number | undefined;
hardMinTokens?: number | undefined;
}): ContextWindowGuardResult {
const warnBelow = Math.max(
1,
Math.floor(params.warnBelowTokens ?? CONTEXT_WINDOW_WARN_BELOW_TOKENS),
);
const hardMin = Math.max(
1,
Math.floor(params.hardMinTokens ?? CONTEXT_WINDOW_HARD_MIN_TOKENS),
);
const tokens = Math.max(0, Math.floor(params.info.tokens));
return {
...params.info,
tokens,
shouldWarn: tokens > 0 && tokens < warnBelow,
shouldBlock: tokens > 0 && tokens < hardMin,
};
}
/**
* context window guard
*
* +
*/
export function checkContextWindow(params: {
modelContextWindow?: number | undefined;
configContextTokens?: number | undefined;
defaultTokens?: number | undefined;
warnBelowTokens?: number | undefined;
hardMinTokens?: number | undefined;
}): ContextWindowGuardResult {
const info = resolveContextWindowInfo({
modelContextWindow: params.modelContextWindow,
configContextTokens: params.configContextTokens,
defaultTokens: params.defaultTokens,
});
return evaluateContextWindowGuard({
info,
warnBelowTokens: params.warnBelowTokens,
hardMinTokens: params.hardMinTokens,
});
}

View file

@ -0,0 +1,46 @@
/**
* Context Window Guard
*
* token
*/
// Types
export type {
ContextWindowSource,
ContextWindowInfo,
ContextWindowGuardResult,
TokenEstimation,
TokenAwareCompactionResult,
} from "./types.js";
// Guard
export {
CONTEXT_WINDOW_HARD_MIN_TOKENS,
CONTEXT_WINDOW_WARN_BELOW_TOKENS,
DEFAULT_CONTEXT_TOKENS,
resolveContextWindowInfo,
evaluateContextWindowGuard,
checkContextWindow,
} from "./guard.js";
// Token estimation
export {
ESTIMATION_SAFETY_MARGIN,
COMPACTION_TRIGGER_RATIO,
COMPACTION_TARGET_RATIO,
MIN_KEEP_MESSAGES,
estimateMessagesTokens,
estimateSystemPromptTokens,
estimateTokenUsage,
shouldCompact,
compactMessagesTokenAware,
isMessageOversized,
} from "./token-estimation.js";
// Summarization
export type { SummaryCompactionResult, SummaryCompactionParams } from "./summarization.js";
export {
splitMessagesForSummary,
compactMessagesWithSummary,
compactMessagesWithChunkedSummary,
} from "./summarization.js";

View file

@ -0,0 +1,299 @@
/**
* Compaction
*
* 使 LLM
*/
import type { AgentMessage } from "@mariozechner/pi-agent-core";
import { generateSummary, estimateTokens } from "@mariozechner/pi-coding-agent";
import type { Model } from "@mariozechner/pi-ai";
import { estimateMessagesTokens } from "./token-estimation.js";
/** 摘要 Compaction 结果 */
export type SummaryCompactionResult = {
/** 保留的消息(包含摘要消息) */
kept: AgentMessage[];
/** 移除的消息数量 */
removedCount: number;
/** 移除的 token 数 */
tokensRemoved: number;
/** 保留的 token 数 */
tokensKept: number;
/** 生成的摘要 */
summary: string;
/** compaction 原因 */
reason: "summary";
};
/** 摘要 Compaction 参数 */
export type SummaryCompactionParams = {
/** 消息列表 */
messages: AgentMessage[];
/** LLM Model用于生成摘要 */
model: Model<any>;
/** API Key */
apiKey: string;
/** 可用 token 数 */
availableTokens: number;
/** 目标利用率 (0-1),默认 0.5 */
targetRatio?: number | undefined;
/** 最少保留消息数,默认 10 */
minKeepMessages?: number | undefined;
/** 预留给摘要生成的 token 数,默认 2048 */
reserveTokens?: number | undefined;
/** 自定义摘要指令 */
customInstructions?: string | undefined;
/** 之前的摘要(用于增量更新) */
previousSummary?: string | undefined;
/** AbortSignal */
signal?: AbortSignal | undefined;
};
/** 默认摘要提示词 */
const DEFAULT_SUMMARY_INSTRUCTIONS = `Summarize the conversation history concisely, focusing on:
- Key decisions made
- Important context and constraints
- Open questions or TODOs
- Technical details that may be needed later
Keep the summary concise but complete. Use bullet points for clarity.`;
/**
*
*/
export function splitMessagesForSummary(
messages: AgentMessage[],
availableTokens: number,
options?: {
targetRatio?: number | undefined;
minKeepMessages?: number | undefined;
},
): { toSummarize: AgentMessage[]; toKeep: AgentMessage[] } | null {
const targetRatio = options?.targetRatio ?? 0.5;
const minKeep = options?.minKeepMessages ?? 10;
if (messages.length <= minKeep) {
return null; // 消息太少,不需要压缩
}
const totalTokens = estimateMessagesTokens(messages);
const targetTokens = Math.floor(availableTokens * targetRatio);
// 如果当前已经在目标内,不需要压缩
if (totalTokens <= targetTokens) {
return null;
}
// 从后往前保留消息
const toKeep: AgentMessage[] = [];
let keptTokens = 0;
for (let i = messages.length - 1; i >= 0; i--) {
const msg = messages[i]!;
const msgTokens = estimateTokens(msg);
// 检查是否可以添加这条消息
if (keptTokens + msgTokens <= targetTokens || toKeep.length < minKeep) {
toKeep.unshift(msg);
keptTokens += msgTokens;
}
// 如果已经达到最小保留数且超过目标,停止
if (toKeep.length >= minKeep && keptTokens >= targetTokens) {
break;
}
}
// 需要摘要的消息
const toSummarize = messages.slice(0, messages.length - toKeep.length);
if (toSummarize.length === 0) {
return null;
}
return { toSummarize, toKeep };
}
/**
*
*/
function createSummaryMessage(summary: string, previousSummary?: string): AgentMessage {
const content = previousSummary
? `## Previous Context Summary\n${previousSummary}\n\n## Recent Context Summary\n${summary}`
: `## Conversation Summary\n${summary}`;
return {
role: "user",
content: `[System Note: The following is a summary of the earlier conversation history that has been compacted to save context space.]\n\n${content}\n\n[End of Summary]`,
timestamp: Date.now(),
};
}
/**
* Compaction
*
* 使 LLM
*/
export async function compactMessagesWithSummary(
params: SummaryCompactionParams,
): Promise<SummaryCompactionResult | null> {
const {
messages,
model,
apiKey,
availableTokens,
targetRatio,
minKeepMessages,
reserveTokens = 2048,
customInstructions,
previousSummary,
signal,
} = params;
// 分割消息
const split = splitMessagesForSummary(messages, availableTokens, {
targetRatio,
minKeepMessages,
});
if (!split) {
return null;
}
const { toSummarize, toKeep } = split;
// 生成摘要
const instructions = customInstructions || DEFAULT_SUMMARY_INSTRUCTIONS;
const summary = await generateSummary(
toSummarize,
model,
reserveTokens,
apiKey,
signal,
instructions,
previousSummary,
);
// 创建摘要消息
const summaryMessage = createSummaryMessage(summary, previousSummary);
// 组合结果
const kept = [summaryMessage, ...toKeep];
const tokensRemoved = estimateMessagesTokens(toSummarize);
const tokensKept = estimateMessagesTokens(kept);
return {
kept,
removedCount: toSummarize.length,
tokensRemoved,
tokensKept,
summary,
reason: "summary",
};
}
/**
*
*
*
*/
export async function compactMessagesWithChunkedSummary(
params: SummaryCompactionParams & {
maxChunkTokens?: number | undefined;
},
): Promise<SummaryCompactionResult | null> {
const {
messages,
model,
apiKey,
availableTokens,
targetRatio,
minKeepMessages,
reserveTokens = 2048,
customInstructions,
previousSummary,
signal,
maxChunkTokens = 50000,
} = params;
// 分割消息
const split = splitMessagesForSummary(messages, availableTokens, {
targetRatio,
minKeepMessages,
});
if (!split) {
return null;
}
const { toSummarize, toKeep } = split;
// 如果需要摘要的消息不多,直接摘要
const toSummarizeTokens = estimateMessagesTokens(toSummarize);
if (toSummarizeTokens <= maxChunkTokens) {
return compactMessagesWithSummary(params);
}
// 分块处理
const chunks: AgentMessage[][] = [];
let currentChunk: AgentMessage[] = [];
let currentTokens = 0;
for (const msg of toSummarize) {
const msgTokens = estimateTokens(msg);
if (currentTokens + msgTokens > maxChunkTokens && currentChunk.length > 0) {
chunks.push(currentChunk);
currentChunk = [];
currentTokens = 0;
}
currentChunk.push(msg);
currentTokens += msgTokens;
}
if (currentChunk.length > 0) {
chunks.push(currentChunk);
}
// 为每个块生成摘要
const instructions = customInstructions || DEFAULT_SUMMARY_INSTRUCTIONS;
const chunkSummaries: string[] = [];
let runningContext = previousSummary;
for (const chunk of chunks) {
const chunkSummary = await generateSummary(
chunk,
model,
reserveTokens,
apiKey,
signal,
instructions,
runningContext,
);
chunkSummaries.push(chunkSummary);
runningContext = chunkSummary;
}
// 最终摘要就是最后一个块的摘要(已经包含了之前的上下文)
const finalSummary = chunkSummaries[chunkSummaries.length - 1] ?? "";
// 创建摘要消息
const summaryMessage = createSummaryMessage(finalSummary);
// 组合结果
const kept = [summaryMessage, ...toKeep];
const tokensRemoved = estimateMessagesTokens(toSummarize);
const tokensKept = estimateMessagesTokens(kept);
return {
kept,
removedCount: toSummarize.length,
tokensRemoved,
tokensKept,
summary: finalSummary,
reason: "summary",
};
}

View file

@ -0,0 +1,156 @@
/**
* Token
*
* token
*/
import type { AgentMessage } from "@mariozechner/pi-agent-core";
import { estimateTokens } from "@mariozechner/pi-coding-agent";
import type { TokenEstimation, TokenAwareCompactionResult } from "./types.js";
/** 安全边界系数,用于补偿估算不准确 */
export const ESTIMATION_SAFETY_MARGIN = 1.2; // 20% buffer
/** 触发 compaction 的利用率阈值 */
export const COMPACTION_TRIGGER_RATIO = 0.8; // 80%
/** Compaction 目标利用率 */
export const COMPACTION_TARGET_RATIO = 0.5; // 50%
/** 最小保留消息数 */
export const MIN_KEEP_MESSAGES = 10;
/**
* token
*/
export function estimateMessagesTokens(messages: AgentMessage[]): number {
return messages.reduce((sum, message) => sum + estimateTokens(message), 0);
}
/**
* token
*/
export function estimateSystemPromptTokens(systemPrompt: string | undefined): number {
if (!systemPrompt) return 0;
// 简单估算:约 4 字符 = 1 token适用于英文/代码混合文本)
// 中文约 2 字符 = 1 token
// 取平均值 3
return Math.ceil(systemPrompt.length / 3);
}
/**
* token 使
*/
export function estimateTokenUsage(params: {
messages: AgentMessage[];
systemPrompt?: string | undefined;
contextWindowTokens: number;
reserveTokens?: number | undefined;
}): TokenEstimation {
const messageTokens = estimateMessagesTokens(params.messages);
const systemPromptTokens = estimateSystemPromptTokens(params.systemPrompt);
const reserve = params.reserveTokens ?? 1024; // 预留给响应生成
// 可用 token = 总窗口 - 系统提示 - 预留
const availableTokens = Math.max(
0,
params.contextWindowTokens - systemPromptTokens - reserve,
);
// 计算利用率(带安全边界)
const safeMessageTokens = messageTokens * ESTIMATION_SAFETY_MARGIN;
const utilizationRatio = availableTokens > 0 ? safeMessageTokens / availableTokens : 1;
return {
messageTokens,
systemPromptTokens,
availableTokens,
utilizationRatio,
};
}
/**
* compaction
*/
export function shouldCompact(estimation: TokenEstimation): boolean {
return estimation.utilizationRatio >= COMPACTION_TRIGGER_RATIO;
}
/**
* Token
*
*
*/
export function compactMessagesTokenAware(
messages: AgentMessage[],
availableTokens: number,
options?: {
targetRatio?: number;
minKeepMessages?: number;
},
): TokenAwareCompactionResult | null {
const targetRatio = options?.targetRatio ?? COMPACTION_TARGET_RATIO;
const minKeep = options?.minKeepMessages ?? MIN_KEEP_MESSAGES;
if (messages.length <= minKeep) {
return null; // 消息太少,不压缩
}
const currentTokens = estimateMessagesTokens(messages);
const targetTokens = Math.floor(availableTokens * targetRatio);
// 如果当前已经在目标内,不需要压缩
if (currentTokens <= targetTokens) {
return null;
}
// 从后往前保留消息,直到达到目标 token 数
const kept: AgentMessage[] = [];
let keptTokens = 0;
// 反向遍历,保留最新的消息
for (let i = messages.length - 1; i >= 0; i--) {
const msg = messages[i]!;
const msgTokens = estimateTokens(msg);
// 检查是否可以添加这条消息
if (keptTokens + msgTokens <= targetTokens || kept.length < minKeep) {
kept.unshift(msg);
keptTokens += msgTokens;
}
// 如果已经达到最小保留数且超过目标,停止
if (kept.length >= minKeep && keptTokens >= targetTokens) {
break;
}
}
// 如果保留的消息数量不变,说明没有压缩
if (kept.length >= messages.length) {
return null;
}
const removedCount = messages.length - kept.length;
const tokensRemoved = currentTokens - keptTokens;
return {
kept,
removedCount,
tokensRemoved,
tokensKept: keptTokens,
};
}
/**
*
*
* context window
*/
export function isMessageOversized(
message: AgentMessage,
contextWindowTokens: number,
maxRatio: number = 0.5,
): boolean {
const tokens = estimateTokens(message) * ESTIMATION_SAFETY_MARGIN;
return tokens > contextWindowTokens * maxRatio;
}

View file

@ -0,0 +1,48 @@
/**
* Context Window Guard -
*
* LLM
*/
/** Context window 信息来源 */
export type ContextWindowSource = "model" | "config" | "default";
/** Context window 信息 */
export type ContextWindowInfo = {
/** Token 数量 */
tokens: number;
/** 来源 */
source: ContextWindowSource;
};
/** Context window guard 验证结果 */
export type ContextWindowGuardResult = ContextWindowInfo & {
/** 是否需要警告(窗口较小) */
shouldWarn: boolean;
/** 是否应该阻止运行(窗口太小) */
shouldBlock: boolean;
};
/** Token 估算结果 */
export type TokenEstimation = {
/** 消息总 token 数 */
messageTokens: number;
/** 系统提示词 token 数 */
systemPromptTokens: number;
/** 可用 token 数 */
availableTokens: number;
/** 使用率 (0-1) */
utilizationRatio: number;
};
/** Compaction 结果(带 token 信息) */
export type TokenAwareCompactionResult = {
/** 保留的消息 */
kept: import("@mariozechner/pi-agent-core").AgentMessage[];
/** 移除的消息数量 */
removedCount: number;
/** 移除的 token 数 */
tokensRemoved: number;
/** 保留的 token 数 */
tokensKept: number;
};