feat(agent): add context window management with token-aware compaction (#14)

* feat(agent): add context window guard to prevent token overflow

Implement token-aware context management that validates context window
size on agent initialization and provides intelligent message compaction
based on actual token usage rather than simple message count.

Key changes:
- Add context-window module with guard, token estimation, and types
- Support both "count" (legacy) and "tokens" (new default) compaction modes
- Warn when context window < 32K tokens, block when < 16K tokens
- Trigger compaction at 80% utilization, target 50% after compaction

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

* feat(agent): add summary-based compaction using LLM

Implement intelligent compaction that uses LLM to generate summaries
of older messages instead of simply truncating them. This preserves
important context like key decisions, TODOs, and technical details.

Key changes:
- Add summarization.ts with compactMessagesWithSummary functions
- Support chunked summarization for very large histories
- Add "summary" compaction mode alongside "count" and "tokens"
- Auto-resolve API key from environment based on provider
- Graceful fallback to "tokens" mode if model/apiKey unavailable

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Jiayuan 2026-01-30 03:46:11 +08:00 committed by GitHub
parent 3024e89071
commit 67cd46a072
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 1116 additions and 16 deletions

View file

@ -0,0 +1,110 @@
/**
* Context Window Guard -
*
* agent context window token
*/
import type { ContextWindowInfo, ContextWindowGuardResult, ContextWindowSource } from "./types.js";
/** 硬性最小 token 数,低于此值将阻止运行 */
export const CONTEXT_WINDOW_HARD_MIN_TOKENS = 16_000;
/** 警告阈值,低于此值会发出警告 */
export const CONTEXT_WINDOW_WARN_BELOW_TOKENS = 32_000;
/** 默认 context window当无法获取时 */
export const DEFAULT_CONTEXT_TOKENS = 200_000;
/**
*
*/
function normalizePositiveInt(value: unknown): number | null {
if (typeof value !== "number" || !Number.isFinite(value)) return null;
const int = Math.floor(value);
return int > 0 ? int : null;
}
/**
* context window
*
* model > config > default
*/
export function resolveContextWindowInfo(params: {
/** Model 的 contextWindow 属性 */
modelContextWindow?: number | undefined;
/** 配置中指定的 context tokens */
configContextTokens?: number | undefined;
/** 默认值 */
defaultTokens?: number | undefined;
}): ContextWindowInfo {
// 1. 尝试从 model 获取
const fromModel = normalizePositiveInt(params.modelContextWindow);
if (fromModel) {
return { tokens: fromModel, source: "model" };
}
// 2. 尝试从配置获取
const fromConfig = normalizePositiveInt(params.configContextTokens);
if (fromConfig) {
return { tokens: fromConfig, source: "config" };
}
// 3. 使用默认值
return {
tokens: Math.floor(params.defaultTokens ?? DEFAULT_CONTEXT_TOKENS),
source: "default",
};
}
/**
* context window guard
*
*
*/
export function evaluateContextWindowGuard(params: {
info: ContextWindowInfo;
warnBelowTokens?: number | undefined;
hardMinTokens?: number | undefined;
}): ContextWindowGuardResult {
const warnBelow = Math.max(
1,
Math.floor(params.warnBelowTokens ?? CONTEXT_WINDOW_WARN_BELOW_TOKENS),
);
const hardMin = Math.max(
1,
Math.floor(params.hardMinTokens ?? CONTEXT_WINDOW_HARD_MIN_TOKENS),
);
const tokens = Math.max(0, Math.floor(params.info.tokens));
return {
...params.info,
tokens,
shouldWarn: tokens > 0 && tokens < warnBelow,
shouldBlock: tokens > 0 && tokens < hardMin,
};
}
/**
* context window guard
*
* +
*/
export function checkContextWindow(params: {
modelContextWindow?: number | undefined;
configContextTokens?: number | undefined;
defaultTokens?: number | undefined;
warnBelowTokens?: number | undefined;
hardMinTokens?: number | undefined;
}): ContextWindowGuardResult {
const info = resolveContextWindowInfo({
modelContextWindow: params.modelContextWindow,
configContextTokens: params.configContextTokens,
defaultTokens: params.defaultTokens,
});
return evaluateContextWindowGuard({
info,
warnBelowTokens: params.warnBelowTokens,
hardMinTokens: params.hardMinTokens,
});
}

View file

@ -0,0 +1,46 @@
/**
* Context Window Guard
*
* token
*/
// Types
export type {
ContextWindowSource,
ContextWindowInfo,
ContextWindowGuardResult,
TokenEstimation,
TokenAwareCompactionResult,
} from "./types.js";
// Guard
export {
CONTEXT_WINDOW_HARD_MIN_TOKENS,
CONTEXT_WINDOW_WARN_BELOW_TOKENS,
DEFAULT_CONTEXT_TOKENS,
resolveContextWindowInfo,
evaluateContextWindowGuard,
checkContextWindow,
} from "./guard.js";
// Token estimation
export {
ESTIMATION_SAFETY_MARGIN,
COMPACTION_TRIGGER_RATIO,
COMPACTION_TARGET_RATIO,
MIN_KEEP_MESSAGES,
estimateMessagesTokens,
estimateSystemPromptTokens,
estimateTokenUsage,
shouldCompact,
compactMessagesTokenAware,
isMessageOversized,
} from "./token-estimation.js";
// Summarization
export type { SummaryCompactionResult, SummaryCompactionParams } from "./summarization.js";
export {
splitMessagesForSummary,
compactMessagesWithSummary,
compactMessagesWithChunkedSummary,
} from "./summarization.js";

View file

@ -0,0 +1,299 @@
/**
* Compaction
*
* 使 LLM
*/
import type { AgentMessage } from "@mariozechner/pi-agent-core";
import { generateSummary, estimateTokens } from "@mariozechner/pi-coding-agent";
import type { Model } from "@mariozechner/pi-ai";
import { estimateMessagesTokens } from "./token-estimation.js";
/** 摘要 Compaction 结果 */
export type SummaryCompactionResult = {
/** 保留的消息(包含摘要消息) */
kept: AgentMessage[];
/** 移除的消息数量 */
removedCount: number;
/** 移除的 token 数 */
tokensRemoved: number;
/** 保留的 token 数 */
tokensKept: number;
/** 生成的摘要 */
summary: string;
/** compaction 原因 */
reason: "summary";
};
/** 摘要 Compaction 参数 */
export type SummaryCompactionParams = {
/** 消息列表 */
messages: AgentMessage[];
/** LLM Model用于生成摘要 */
model: Model<any>;
/** API Key */
apiKey: string;
/** 可用 token 数 */
availableTokens: number;
/** 目标利用率 (0-1),默认 0.5 */
targetRatio?: number | undefined;
/** 最少保留消息数,默认 10 */
minKeepMessages?: number | undefined;
/** 预留给摘要生成的 token 数,默认 2048 */
reserveTokens?: number | undefined;
/** 自定义摘要指令 */
customInstructions?: string | undefined;
/** 之前的摘要(用于增量更新) */
previousSummary?: string | undefined;
/** AbortSignal */
signal?: AbortSignal | undefined;
};
/** 默认摘要提示词 */
const DEFAULT_SUMMARY_INSTRUCTIONS = `Summarize the conversation history concisely, focusing on:
- Key decisions made
- Important context and constraints
- Open questions or TODOs
- Technical details that may be needed later
Keep the summary concise but complete. Use bullet points for clarity.`;
/**
*
*/
export function splitMessagesForSummary(
messages: AgentMessage[],
availableTokens: number,
options?: {
targetRatio?: number | undefined;
minKeepMessages?: number | undefined;
},
): { toSummarize: AgentMessage[]; toKeep: AgentMessage[] } | null {
const targetRatio = options?.targetRatio ?? 0.5;
const minKeep = options?.minKeepMessages ?? 10;
if (messages.length <= minKeep) {
return null; // 消息太少,不需要压缩
}
const totalTokens = estimateMessagesTokens(messages);
const targetTokens = Math.floor(availableTokens * targetRatio);
// 如果当前已经在目标内,不需要压缩
if (totalTokens <= targetTokens) {
return null;
}
// 从后往前保留消息
const toKeep: AgentMessage[] = [];
let keptTokens = 0;
for (let i = messages.length - 1; i >= 0; i--) {
const msg = messages[i]!;
const msgTokens = estimateTokens(msg);
// 检查是否可以添加这条消息
if (keptTokens + msgTokens <= targetTokens || toKeep.length < minKeep) {
toKeep.unshift(msg);
keptTokens += msgTokens;
}
// 如果已经达到最小保留数且超过目标,停止
if (toKeep.length >= minKeep && keptTokens >= targetTokens) {
break;
}
}
// 需要摘要的消息
const toSummarize = messages.slice(0, messages.length - toKeep.length);
if (toSummarize.length === 0) {
return null;
}
return { toSummarize, toKeep };
}
/**
*
*/
function createSummaryMessage(summary: string, previousSummary?: string): AgentMessage {
const content = previousSummary
? `## Previous Context Summary\n${previousSummary}\n\n## Recent Context Summary\n${summary}`
: `## Conversation Summary\n${summary}`;
return {
role: "user",
content: `[System Note: The following is a summary of the earlier conversation history that has been compacted to save context space.]\n\n${content}\n\n[End of Summary]`,
timestamp: Date.now(),
};
}
/**
* Compaction
*
* 使 LLM
*/
export async function compactMessagesWithSummary(
params: SummaryCompactionParams,
): Promise<SummaryCompactionResult | null> {
const {
messages,
model,
apiKey,
availableTokens,
targetRatio,
minKeepMessages,
reserveTokens = 2048,
customInstructions,
previousSummary,
signal,
} = params;
// 分割消息
const split = splitMessagesForSummary(messages, availableTokens, {
targetRatio,
minKeepMessages,
});
if (!split) {
return null;
}
const { toSummarize, toKeep } = split;
// 生成摘要
const instructions = customInstructions || DEFAULT_SUMMARY_INSTRUCTIONS;
const summary = await generateSummary(
toSummarize,
model,
reserveTokens,
apiKey,
signal,
instructions,
previousSummary,
);
// 创建摘要消息
const summaryMessage = createSummaryMessage(summary, previousSummary);
// 组合结果
const kept = [summaryMessage, ...toKeep];
const tokensRemoved = estimateMessagesTokens(toSummarize);
const tokensKept = estimateMessagesTokens(kept);
return {
kept,
removedCount: toSummarize.length,
tokensRemoved,
tokensKept,
summary,
reason: "summary",
};
}
/**
*
*
*
*/
export async function compactMessagesWithChunkedSummary(
params: SummaryCompactionParams & {
maxChunkTokens?: number | undefined;
},
): Promise<SummaryCompactionResult | null> {
const {
messages,
model,
apiKey,
availableTokens,
targetRatio,
minKeepMessages,
reserveTokens = 2048,
customInstructions,
previousSummary,
signal,
maxChunkTokens = 50000,
} = params;
// 分割消息
const split = splitMessagesForSummary(messages, availableTokens, {
targetRatio,
minKeepMessages,
});
if (!split) {
return null;
}
const { toSummarize, toKeep } = split;
// 如果需要摘要的消息不多,直接摘要
const toSummarizeTokens = estimateMessagesTokens(toSummarize);
if (toSummarizeTokens <= maxChunkTokens) {
return compactMessagesWithSummary(params);
}
// 分块处理
const chunks: AgentMessage[][] = [];
let currentChunk: AgentMessage[] = [];
let currentTokens = 0;
for (const msg of toSummarize) {
const msgTokens = estimateTokens(msg);
if (currentTokens + msgTokens > maxChunkTokens && currentChunk.length > 0) {
chunks.push(currentChunk);
currentChunk = [];
currentTokens = 0;
}
currentChunk.push(msg);
currentTokens += msgTokens;
}
if (currentChunk.length > 0) {
chunks.push(currentChunk);
}
// 为每个块生成摘要
const instructions = customInstructions || DEFAULT_SUMMARY_INSTRUCTIONS;
const chunkSummaries: string[] = [];
let runningContext = previousSummary;
for (const chunk of chunks) {
const chunkSummary = await generateSummary(
chunk,
model,
reserveTokens,
apiKey,
signal,
instructions,
runningContext,
);
chunkSummaries.push(chunkSummary);
runningContext = chunkSummary;
}
// 最终摘要就是最后一个块的摘要(已经包含了之前的上下文)
const finalSummary = chunkSummaries[chunkSummaries.length - 1] ?? "";
// 创建摘要消息
const summaryMessage = createSummaryMessage(finalSummary);
// 组合结果
const kept = [summaryMessage, ...toKeep];
const tokensRemoved = estimateMessagesTokens(toSummarize);
const tokensKept = estimateMessagesTokens(kept);
return {
kept,
removedCount: toSummarize.length,
tokensRemoved,
tokensKept,
summary: finalSummary,
reason: "summary",
};
}

View file

@ -0,0 +1,156 @@
/**
* Token
*
* token
*/
import type { AgentMessage } from "@mariozechner/pi-agent-core";
import { estimateTokens } from "@mariozechner/pi-coding-agent";
import type { TokenEstimation, TokenAwareCompactionResult } from "./types.js";
/** 安全边界系数,用于补偿估算不准确 */
export const ESTIMATION_SAFETY_MARGIN = 1.2; // 20% buffer
/** 触发 compaction 的利用率阈值 */
export const COMPACTION_TRIGGER_RATIO = 0.8; // 80%
/** Compaction 目标利用率 */
export const COMPACTION_TARGET_RATIO = 0.5; // 50%
/** 最小保留消息数 */
export const MIN_KEEP_MESSAGES = 10;
/**
* token
*/
export function estimateMessagesTokens(messages: AgentMessage[]): number {
return messages.reduce((sum, message) => sum + estimateTokens(message), 0);
}
/**
* token
*/
export function estimateSystemPromptTokens(systemPrompt: string | undefined): number {
if (!systemPrompt) return 0;
// 简单估算:约 4 字符 = 1 token适用于英文/代码混合文本)
// 中文约 2 字符 = 1 token
// 取平均值 3
return Math.ceil(systemPrompt.length / 3);
}
/**
* token 使
*/
export function estimateTokenUsage(params: {
messages: AgentMessage[];
systemPrompt?: string | undefined;
contextWindowTokens: number;
reserveTokens?: number | undefined;
}): TokenEstimation {
const messageTokens = estimateMessagesTokens(params.messages);
const systemPromptTokens = estimateSystemPromptTokens(params.systemPrompt);
const reserve = params.reserveTokens ?? 1024; // 预留给响应生成
// 可用 token = 总窗口 - 系统提示 - 预留
const availableTokens = Math.max(
0,
params.contextWindowTokens - systemPromptTokens - reserve,
);
// 计算利用率(带安全边界)
const safeMessageTokens = messageTokens * ESTIMATION_SAFETY_MARGIN;
const utilizationRatio = availableTokens > 0 ? safeMessageTokens / availableTokens : 1;
return {
messageTokens,
systemPromptTokens,
availableTokens,
utilizationRatio,
};
}
/**
* compaction
*/
export function shouldCompact(estimation: TokenEstimation): boolean {
return estimation.utilizationRatio >= COMPACTION_TRIGGER_RATIO;
}
/**
* Token
*
*
*/
export function compactMessagesTokenAware(
messages: AgentMessage[],
availableTokens: number,
options?: {
targetRatio?: number;
minKeepMessages?: number;
},
): TokenAwareCompactionResult | null {
const targetRatio = options?.targetRatio ?? COMPACTION_TARGET_RATIO;
const minKeep = options?.minKeepMessages ?? MIN_KEEP_MESSAGES;
if (messages.length <= minKeep) {
return null; // 消息太少,不压缩
}
const currentTokens = estimateMessagesTokens(messages);
const targetTokens = Math.floor(availableTokens * targetRatio);
// 如果当前已经在目标内,不需要压缩
if (currentTokens <= targetTokens) {
return null;
}
// 从后往前保留消息,直到达到目标 token 数
const kept: AgentMessage[] = [];
let keptTokens = 0;
// 反向遍历,保留最新的消息
for (let i = messages.length - 1; i >= 0; i--) {
const msg = messages[i]!;
const msgTokens = estimateTokens(msg);
// 检查是否可以添加这条消息
if (keptTokens + msgTokens <= targetTokens || kept.length < minKeep) {
kept.unshift(msg);
keptTokens += msgTokens;
}
// 如果已经达到最小保留数且超过目标,停止
if (kept.length >= minKeep && keptTokens >= targetTokens) {
break;
}
}
// 如果保留的消息数量不变,说明没有压缩
if (kept.length >= messages.length) {
return null;
}
const removedCount = messages.length - kept.length;
const tokensRemoved = currentTokens - keptTokens;
return {
kept,
removedCount,
tokensRemoved,
tokensKept: keptTokens,
};
}
/**
*
*
* context window
*/
export function isMessageOversized(
message: AgentMessage,
contextWindowTokens: number,
maxRatio: number = 0.5,
): boolean {
const tokens = estimateTokens(message) * ESTIMATION_SAFETY_MARGIN;
return tokens > contextWindowTokens * maxRatio;
}

View file

@ -0,0 +1,48 @@
/**
* Context Window Guard -
*
* LLM
*/
/** Context window 信息来源 */
export type ContextWindowSource = "model" | "config" | "default";
/** Context window 信息 */
export type ContextWindowInfo = {
/** Token 数量 */
tokens: number;
/** 来源 */
source: ContextWindowSource;
};
/** Context window guard 验证结果 */
export type ContextWindowGuardResult = ContextWindowInfo & {
/** 是否需要警告(窗口较小) */
shouldWarn: boolean;
/** 是否应该阻止运行(窗口太小) */
shouldBlock: boolean;
};
/** Token 估算结果 */
export type TokenEstimation = {
/** 消息总 token 数 */
messageTokens: number;
/** 系统提示词 token 数 */
systemPromptTokens: number;
/** 可用 token 数 */
availableTokens: number;
/** 使用率 (0-1) */
utilizationRatio: number;
};
/** Compaction 结果(带 token 信息) */
export type TokenAwareCompactionResult = {
/** 保留的消息 */
kept: import("@mariozechner/pi-agent-core").AgentMessage[];
/** 移除的消息数量 */
removedCount: number;
/** 移除的 token 数 */
tokensRemoved: number;
/** 保留的 token 数 */
tokensKept: number;
};

View file

@ -1,3 +1,4 @@
export * from "./runner.js";
export * from "./types.js";
export * from "./profile/index.js";
export * from "./context-window/index.js";

View file

@ -5,12 +5,45 @@ import { createAgentOutput } from "./output.js";
import { resolveModel, resolveTools } from "./tools.js";
import { SessionManager } from "./session/session-manager.js";
import { ProfileManager } from "./profile/index.js";
import {
checkContextWindow,
DEFAULT_CONTEXT_TOKENS,
type ContextWindowGuardResult,
} from "./context-window/index.js";
/**
* provider API Key
*/
function resolveApiKey(provider: string): string | undefined {
const providerEnvMap: Record<string, string> = {
openai: "OPENAI_API_KEY",
anthropic: "ANTHROPIC_API_KEY",
google: "GOOGLE_API_KEY",
"google-genai": "GOOGLE_API_KEY",
kimi: "MOONSHOT_API_KEY",
"kimi-coding": "MOONSHOT_API_KEY",
deepseek: "DEEPSEEK_API_KEY",
groq: "GROQ_API_KEY",
mistral: "MISTRAL_API_KEY",
together: "TOGETHER_API_KEY",
};
const envVar = providerEnvMap[provider];
if (envVar) {
return process.env[envVar];
}
// 尝试通用格式: PROVIDER_API_KEY
const normalizedProvider = provider.toUpperCase().replace(/-/g, "_");
return process.env[`${normalizedProvider}_API_KEY`];
}
export class Agent {
private readonly agent: PiAgentCore;
private readonly output;
private readonly session: SessionManager;
private readonly profile?: ProfileManager;
private readonly contextWindowGuard: ContextWindowGuardResult;
/** 当前会话 ID */
readonly sessionId: string;
@ -23,34 +56,87 @@ export class Agent {
this.agent = new PiAgentCore();
// 加载 Agent Profile如果指定了 profileId
let systemPrompt: string | undefined;
if (options.profileId) {
this.profile = new ProfileManager({
profileId: options.profileId,
baseDir: options.profileBaseDir,
});
const systemPrompt = this.profile.buildSystemPrompt();
systemPrompt = this.profile.buildSystemPrompt();
if (systemPrompt) {
this.agent.setSystemPrompt(systemPrompt);
}
} else if (options.systemPrompt) {
// 直接使用传入的 systemPrompt
systemPrompt = options.systemPrompt;
this.agent.setSystemPrompt(options.systemPrompt);
}
this.sessionId = options.sessionId ?? uuidv7();
this.session = new SessionManager({ sessionId: this.sessionId });
const storedMeta = this.session.getMeta();
if (!options.thinkingLevel && storedMeta?.thinkingLevel) {
this.agent.setThinkingLevel(storedMeta.thinkingLevel as any);
} else if (options.thinkingLevel) {
this.agent.setThinkingLevel(options.thinkingLevel);
}
// 解析 model用于获取 context window
const storedMeta = (() => {
// 临时创建 session 获取 meta避免循环依赖
const tempSession = new SessionManager({ sessionId: this.sessionId });
return tempSession.getMeta();
})();
const model = options.provider && options.model ? resolveModel(options) : resolveModel({
...options,
provider: storedMeta?.provider,
model: storedMeta?.model,
});
// === Context Window Guard ===
this.contextWindowGuard = checkContextWindow({
modelContextWindow: model.contextWindow,
configContextTokens: options.contextWindowTokens,
defaultTokens: DEFAULT_CONTEXT_TOKENS,
});
// 警告context window 较小
if (this.contextWindowGuard.shouldWarn) {
stderr.write(
`[Context Window Guard] WARNING: Low context window: ${this.contextWindowGuard.tokens} tokens (source: ${this.contextWindowGuard.source})\n`,
);
}
// 阻止context window 太小
if (this.contextWindowGuard.shouldBlock) {
throw new Error(
`[Context Window Guard] Context window too small: ${this.contextWindowGuard.tokens} tokens. ` +
`Minimum required: 16,000 tokens. Please use a model with a larger context window.`,
);
}
// 确定 compaction 模式
const compactionMode = options.compactionMode ?? "tokens"; // 默认使用 token 模式
// 获取 API Key用于 summary 模式)
const apiKey = compactionMode === "summary" ? resolveApiKey(model.provider) : undefined;
// 创建 SessionManager带 context window 配置)
this.session = new SessionManager({
sessionId: this.sessionId,
compactionMode,
// Token 模式参数
contextWindowTokens: this.contextWindowGuard.tokens,
systemPrompt,
reserveTokens: options.reserveTokens,
targetRatio: options.compactionTargetRatio,
minKeepMessages: options.minKeepMessages,
// Summary 模式参数
model: compactionMode === "summary" ? model : undefined,
apiKey,
customInstructions: options.summaryInstructions,
});
if (!options.thinkingLevel && storedMeta?.thinkingLevel) {
this.agent.setThinkingLevel(storedMeta.thinkingLevel as any);
} else if (options.thinkingLevel) {
this.agent.setThinkingLevel(options.thinkingLevel);
}
this.agent.setModel(model);
this.agent.setTools(resolveTools(options));
@ -63,6 +149,7 @@ export class Agent {
provider: this.agent.state.model?.provider,
model: this.agent.state.model?.id,
thinkingLevel: this.agent.state.thinkingLevel,
contextWindowTokens: this.contextWindowGuard.tokens,
});
this.agent.subscribe((event: AgentEvent) => {

View file

@ -1,15 +1,186 @@
import type { AgentMessage } from "@mariozechner/pi-agent-core";
import type { Model } from "@mariozechner/pi-ai";
import {
estimateMessagesTokens,
compactMessagesTokenAware,
estimateTokenUsage,
shouldCompact as shouldCompactTokens,
compactMessagesWithSummary,
compactMessagesWithChunkedSummary,
COMPACTION_TARGET_RATIO,
MIN_KEEP_MESSAGES,
} from "../context-window/index.js";
export type CompactionResult = {
kept: AgentMessage[];
removedCount: number;
/** Token 感知模式下的额外信息 */
tokensRemoved?: number | undefined;
tokensKept?: number | undefined;
/** 摘要模式下生成的摘要 */
summary?: string | undefined;
reason: "count" | "tokens" | "summary";
};
export function compactMessages(messages: AgentMessage[], maxMessages: number, keepLast: number) {
/**
*
*/
export function compactMessagesByCount(
messages: AgentMessage[],
maxMessages: number,
keepLast: number,
): CompactionResult | null {
if (messages.length <= maxMessages) return null;
const kept = messages.slice(-keepLast);
return {
kept,
removedCount: messages.length - kept.length,
} satisfies CompactionResult;
reason: "count",
};
}
/**
* token
*/
export function compactMessagesByTokens(
messages: AgentMessage[],
availableTokens: number,
options?: {
targetRatio?: number;
minKeepMessages?: number;
},
): CompactionResult | null {
const result = compactMessagesTokenAware(messages, availableTokens, options);
if (!result) return null;
return {
kept: result.kept,
removedCount: result.removedCount,
tokensRemoved: result.tokensRemoved,
tokensKept: result.tokensKept,
reason: "tokens",
};
}
/** 同步压缩选项count/tokens 模式) */
export type SyncCompactionOptions = {
mode: "count" | "tokens";
// count 模式参数
maxMessages?: number | undefined;
keepLast?: number | undefined;
// tokens 模式参数
contextWindowTokens?: number | undefined;
systemPrompt?: string | undefined;
reserveTokens?: number | undefined;
targetRatio?: number | undefined;
minKeepMessages?: number | undefined;
};
/** 摘要压缩选项summary 模式) */
export type SummaryCompactionOptions = {
mode: "summary";
// 必需参数
model: Model<any>;
apiKey: string;
// tokens 模式参数(复用)
contextWindowTokens?: number | undefined;
systemPrompt?: string | undefined;
reserveTokens?: number | undefined;
targetRatio?: number | undefined;
minKeepMessages?: number | undefined;
// summary 特有参数
customInstructions?: string | undefined;
previousSummary?: string | undefined;
signal?: AbortSignal | undefined;
maxChunkTokens?: number | undefined;
};
export type CompactionOptions = SyncCompactionOptions | SummaryCompactionOptions;
/**
* count/tokens
*
*
*/
export function compactMessages(
messages: AgentMessage[],
options: SyncCompactionOptions,
): CompactionResult | null {
if (options.mode === "count") {
return compactMessagesByCount(
messages,
options.maxMessages ?? 80,
options.keepLast ?? 60,
);
}
// Token 模式
const contextWindowTokens = options.contextWindowTokens ?? 200_000;
const estimation = estimateTokenUsage({
messages,
systemPrompt: options.systemPrompt,
contextWindowTokens,
reserveTokens: options.reserveTokens,
});
// 检查是否需要压缩
if (!shouldCompactTokens(estimation)) {
return null;
}
return compactMessagesByTokens(messages, estimation.availableTokens, {
targetRatio: options.targetRatio ?? COMPACTION_TARGET_RATIO,
minKeepMessages: options.minKeepMessages ?? MIN_KEEP_MESSAGES,
});
}
/**
*
*
* 使 LLM
*/
export async function compactMessagesAsync(
messages: AgentMessage[],
options: SummaryCompactionOptions,
): Promise<CompactionResult | null> {
const contextWindowTokens = options.contextWindowTokens ?? 200_000;
const estimation = estimateTokenUsage({
messages,
systemPrompt: options.systemPrompt,
contextWindowTokens,
reserveTokens: options.reserveTokens,
});
// 检查是否需要压缩
if (!shouldCompactTokens(estimation)) {
return null;
}
// 使用分块摘要处理超大历史
const result = await compactMessagesWithChunkedSummary({
messages,
model: options.model,
apiKey: options.apiKey,
availableTokens: estimation.availableTokens,
targetRatio: options.targetRatio ?? COMPACTION_TARGET_RATIO,
minKeepMessages: options.minKeepMessages ?? MIN_KEEP_MESSAGES,
reserveTokens: options.reserveTokens ?? 2048,
customInstructions: options.customInstructions,
previousSummary: options.previousSummary,
signal: options.signal,
maxChunkTokens: options.maxChunkTokens,
});
if (!result) {
return null;
}
return {
kept: result.kept,
removedCount: result.removedCount,
tokensRemoved: result.tokensRemoved,
tokensKept: result.tokensKept,
summary: result.summary,
reason: "summary",
};
}

View file

@ -1,31 +1,125 @@
import type { AgentMessage } from "@mariozechner/pi-agent-core";
import type { Model } from "@mariozechner/pi-ai";
import type { SessionEntry, SessionMeta } from "./types.js";
import { appendEntry, readEntries, writeEntries } from "./storage.js";
import { compactMessages } from "./compaction.js";
import { compactMessages, compactMessagesAsync } from "./compaction.js";
export type SessionManagerOptions = {
sessionId: string;
baseDir?: string;
maxMessages?: number;
keepLast?: number;
baseDir?: string | undefined;
// Compaction 模式配置
/** Compaction 模式: "count" 使用消息计数, "tokens" 使用 token 感知, "summary" 使用 LLM 摘要 */
compactionMode?: "count" | "tokens" | "summary" | undefined;
// Count 模式参数
maxMessages?: number | undefined;
keepLast?: number | undefined;
// Token 模式参数
/** Context window token 数 */
contextWindowTokens?: number | undefined;
/** 系统提示词(用于计算可用 token */
systemPrompt?: string | undefined;
/** 预留给响应的 token 数 */
reserveTokens?: number | undefined;
/** Compaction 目标利用率 (0-1) */
targetRatio?: number | undefined;
/** 最小保留消息数 */
minKeepMessages?: number | undefined;
// Summary 模式参数
/** LLM Model用于生成摘要 */
model?: Model<any> | undefined;
/** API Key */
apiKey?: string | undefined;
/** 自定义摘要指令 */
customInstructions?: string | undefined;
};
export class SessionManager {
private readonly sessionId: string;
private readonly baseDir: string | undefined;
private readonly compactionMode: "count" | "tokens" | "summary";
// Count 模式
private readonly maxMessages: number;
private readonly keepLast: number;
// Token 模式
private readonly contextWindowTokens: number;
private systemPrompt: string | undefined;
private readonly reserveTokens: number;
private readonly targetRatio: number;
private readonly minKeepMessages: number;
// Summary 模式
private model: Model<any> | undefined;
private apiKey: string | undefined;
private readonly customInstructions: string | undefined;
private previousSummary: string | undefined;
private queue: Promise<void> = Promise.resolve();
private meta: SessionMeta | undefined;
constructor(options: SessionManagerOptions) {
this.sessionId = options.sessionId;
this.baseDir = options.baseDir;
// Compaction 模式
this.compactionMode = options.compactionMode ?? "count";
// Count 模式参数
this.maxMessages = options.maxMessages ?? 80;
this.keepLast = options.keepLast ?? 60;
// Token 模式参数
this.contextWindowTokens = options.contextWindowTokens ?? 200_000;
this.systemPrompt = options.systemPrompt;
this.reserveTokens = options.reserveTokens ?? 1024;
this.targetRatio = options.targetRatio ?? 0.5;
this.minKeepMessages = options.minKeepMessages ?? 10;
// Summary 模式参数
this.model = options.model;
this.apiKey = options.apiKey;
this.customInstructions = options.customInstructions;
this.meta = this.loadMeta();
}
/**
* token
*/
setSystemPrompt(systemPrompt: string | undefined) {
this.systemPrompt = systemPrompt;
}
/**
* context window token
*/
getContextWindowTokens(): number {
return this.contextWindowTokens;
}
/**
* LLM Model summary
*/
setModel(model: Model<any> | undefined) {
this.model = model;
}
/**
* API Key summary
*/
setApiKey(apiKey: string | undefined) {
this.apiKey = apiKey;
}
/**
* compaction
*/
getCompactionMode(): "count" | "tokens" | "summary" {
return this.compactionMode;
}
loadEntries(): SessionEntry[] {
return readEntries(this.sessionId, { baseDir: this.baseDir });
}
@ -74,8 +168,56 @@ export class SessionManager {
}
async maybeCompact(messages: AgentMessage[]) {
const result = compactMessages(messages, this.maxMessages, this.keepLast);
let result;
if (this.compactionMode === "summary") {
// Summary 模式需要 model 和 apiKey
if (!this.model || !this.apiKey) {
// 降级到 tokens 模式
result = compactMessages(messages, {
mode: "tokens",
contextWindowTokens: this.contextWindowTokens,
systemPrompt: this.systemPrompt,
reserveTokens: this.reserveTokens,
targetRatio: this.targetRatio,
minKeepMessages: this.minKeepMessages,
});
} else {
result = await compactMessagesAsync(messages, {
mode: "summary",
model: this.model,
apiKey: this.apiKey,
contextWindowTokens: this.contextWindowTokens,
systemPrompt: this.systemPrompt,
reserveTokens: this.reserveTokens,
targetRatio: this.targetRatio,
minKeepMessages: this.minKeepMessages,
customInstructions: this.customInstructions,
previousSummary: this.previousSummary,
});
// 保存摘要用于下次增量更新
if (result?.summary) {
this.previousSummary = result.summary;
}
}
} else {
result = compactMessages(messages, {
mode: this.compactionMode,
// Count 模式参数
maxMessages: this.maxMessages,
keepLast: this.keepLast,
// Token 模式参数
contextWindowTokens: this.contextWindowTokens,
systemPrompt: this.systemPrompt,
reserveTokens: this.reserveTokens,
targetRatio: this.targetRatio,
minKeepMessages: this.minKeepMessages,
});
}
if (!result) return null;
const entries: SessionEntry[] = [];
if (this.meta) {
entries.push({ type: "meta", meta: this.meta, timestamp: Date.now() });
@ -88,7 +230,13 @@ export class SessionManager {
removed: result.removedCount,
kept: result.kept.length,
timestamp: Date.now(),
// Token/Summary 模式下的额外信息
tokensRemoved: result.tokensRemoved,
tokensKept: result.tokensKept,
summary: result.summary,
reason: result.reason,
});
await this.enqueue(() =>
writeEntries(this.sessionId, entries, { baseDir: this.baseDir }),
);

View file

@ -4,9 +4,22 @@ export type SessionMeta = {
provider?: string;
model?: string;
thinkingLevel?: string;
/** Context window token 数 */
contextWindowTokens?: number;
};
export type SessionEntry =
| { type: "message"; message: AgentMessage; timestamp: number }
| { type: "meta"; meta: SessionMeta; timestamp: number }
| { type: "compaction"; removed: number; kept: number; timestamp: number };
| {
type: "compaction";
removed: number;
kept: number;
timestamp: number;
/** Token 感知 compaction 信息(可选,向后兼容) */
tokensRemoved?: number | undefined;
tokensKept?: number | undefined;
/** 摘要模式生成的摘要 */
summary?: string | undefined;
reason?: "count" | "tokens" | "summary" | undefined;
};

View file

@ -24,4 +24,25 @@ export type AgentOptions = {
cwd?: string | undefined;
sessionId?: string | undefined;
logger?: AgentLogger | undefined;
// === Context Window Guard 配置 ===
/** 手动指定 context window token 数(覆盖 model 的值) */
contextWindowTokens?: number | undefined;
/** 预留给响应生成的 token 数,默认 1024 */
reserveTokens?: number | undefined;
/**
* Compaction :
* - "count": 使
* - "tokens": 使 token
* - "summary": 使 LLM
*/
compactionMode?: "count" | "tokens" | "summary" | undefined;
/** Compaction 目标利用率 (0-1),默认 0.5 */
compactionTargetRatio?: number | undefined;
/** 最小保留消息数,默认 10 */
minKeepMessages?: number | undefined;
// === Summary Compaction 配置 ===
/** 自定义摘要生成指令 */
summaryInstructions?: string | undefined;
};