diff --git a/src/agent/context-window/guard.ts b/src/agent/context-window/guard.ts new file mode 100644 index 00000000..20fdd4f2 --- /dev/null +++ b/src/agent/context-window/guard.ts @@ -0,0 +1,110 @@ +/** + * Context Window Guard - 上下文窗口验证 + * + * 在 agent 运行前验证 context window 是否足够,防止 token 溢出 + */ + +import type { ContextWindowInfo, ContextWindowGuardResult, ContextWindowSource } from "./types.js"; + +/** 硬性最小 token 数,低于此值将阻止运行 */ +export const CONTEXT_WINDOW_HARD_MIN_TOKENS = 16_000; + +/** 警告阈值,低于此值会发出警告 */ +export const CONTEXT_WINDOW_WARN_BELOW_TOKENS = 32_000; + +/** 默认 context window(当无法获取时) */ +export const DEFAULT_CONTEXT_TOKENS = 200_000; + +/** + * 标准化为正整数 + */ +function normalizePositiveInt(value: unknown): number | null { + if (typeof value !== "number" || !Number.isFinite(value)) return null; + const int = Math.floor(value); + return int > 0 ? int : null; +} + +/** + * 解析 context window 信息 + * + * 优先级:model > config > default + */ +export function resolveContextWindowInfo(params: { + /** Model 的 contextWindow 属性 */ + modelContextWindow?: number | undefined; + /** 配置中指定的 context tokens */ + configContextTokens?: number | undefined; + /** 默认值 */ + defaultTokens?: number | undefined; +}): ContextWindowInfo { + // 1. 尝试从 model 获取 + const fromModel = normalizePositiveInt(params.modelContextWindow); + if (fromModel) { + return { tokens: fromModel, source: "model" }; + } + + // 2. 尝试从配置获取 + const fromConfig = normalizePositiveInt(params.configContextTokens); + if (fromConfig) { + return { tokens: fromConfig, source: "config" }; + } + + // 3. 使用默认值 + return { + tokens: Math.floor(params.defaultTokens ?? DEFAULT_CONTEXT_TOKENS), + source: "default", + }; +} + +/** + * 评估 context window guard + * + * 返回是否需要警告或阻止运行 + */ +export function evaluateContextWindowGuard(params: { + info: ContextWindowInfo; + warnBelowTokens?: number | undefined; + hardMinTokens?: number | undefined; +}): ContextWindowGuardResult { + const warnBelow = Math.max( + 1, + Math.floor(params.warnBelowTokens ?? CONTEXT_WINDOW_WARN_BELOW_TOKENS), + ); + const hardMin = Math.max( + 1, + Math.floor(params.hardMinTokens ?? CONTEXT_WINDOW_HARD_MIN_TOKENS), + ); + const tokens = Math.max(0, Math.floor(params.info.tokens)); + + return { + ...params.info, + tokens, + shouldWarn: tokens > 0 && tokens < warnBelow, + shouldBlock: tokens > 0 && tokens < hardMin, + }; +} + +/** + * 完整的 context window guard 流程 + * + * 解析 + 评估一步完成 + */ +export function checkContextWindow(params: { + modelContextWindow?: number | undefined; + configContextTokens?: number | undefined; + defaultTokens?: number | undefined; + warnBelowTokens?: number | undefined; + hardMinTokens?: number | undefined; +}): ContextWindowGuardResult { + const info = resolveContextWindowInfo({ + modelContextWindow: params.modelContextWindow, + configContextTokens: params.configContextTokens, + defaultTokens: params.defaultTokens, + }); + + return evaluateContextWindowGuard({ + info, + warnBelowTokens: params.warnBelowTokens, + hardMinTokens: params.hardMinTokens, + }); +} diff --git a/src/agent/context-window/index.ts b/src/agent/context-window/index.ts new file mode 100644 index 00000000..ec57f696 --- /dev/null +++ b/src/agent/context-window/index.ts @@ -0,0 +1,46 @@ +/** + * Context Window Guard + * + * 提供上下文窗口管理功能,防止 token 溢出 + */ + +// Types +export type { + ContextWindowSource, + ContextWindowInfo, + ContextWindowGuardResult, + TokenEstimation, + TokenAwareCompactionResult, +} from "./types.js"; + +// Guard +export { + CONTEXT_WINDOW_HARD_MIN_TOKENS, + CONTEXT_WINDOW_WARN_BELOW_TOKENS, + DEFAULT_CONTEXT_TOKENS, + resolveContextWindowInfo, + evaluateContextWindowGuard, + checkContextWindow, +} from "./guard.js"; + +// Token estimation +export { + ESTIMATION_SAFETY_MARGIN, + COMPACTION_TRIGGER_RATIO, + COMPACTION_TARGET_RATIO, + MIN_KEEP_MESSAGES, + estimateMessagesTokens, + estimateSystemPromptTokens, + estimateTokenUsage, + shouldCompact, + compactMessagesTokenAware, + isMessageOversized, +} from "./token-estimation.js"; + +// Summarization +export type { SummaryCompactionResult, SummaryCompactionParams } from "./summarization.js"; +export { + splitMessagesForSummary, + compactMessagesWithSummary, + compactMessagesWithChunkedSummary, +} from "./summarization.js"; diff --git a/src/agent/context-window/summarization.ts b/src/agent/context-window/summarization.ts new file mode 100644 index 00000000..e6808e51 --- /dev/null +++ b/src/agent/context-window/summarization.ts @@ -0,0 +1,299 @@ +/** + * 摘要式 Compaction + * + * 使用 LLM 生成历史消息的摘要,而不是简单截断 + */ + +import type { AgentMessage } from "@mariozechner/pi-agent-core"; +import { generateSummary, estimateTokens } from "@mariozechner/pi-coding-agent"; +import type { Model } from "@mariozechner/pi-ai"; +import { estimateMessagesTokens } from "./token-estimation.js"; + +/** 摘要 Compaction 结果 */ +export type SummaryCompactionResult = { + /** 保留的消息(包含摘要消息) */ + kept: AgentMessage[]; + /** 移除的消息数量 */ + removedCount: number; + /** 移除的 token 数 */ + tokensRemoved: number; + /** 保留的 token 数 */ + tokensKept: number; + /** 生成的摘要 */ + summary: string; + /** compaction 原因 */ + reason: "summary"; +}; + +/** 摘要 Compaction 参数 */ +export type SummaryCompactionParams = { + /** 消息列表 */ + messages: AgentMessage[]; + /** LLM Model(用于生成摘要) */ + model: Model; + /** API Key */ + apiKey: string; + /** 可用 token 数 */ + availableTokens: number; + /** 目标利用率 (0-1),默认 0.5 */ + targetRatio?: number | undefined; + /** 最少保留消息数,默认 10 */ + minKeepMessages?: number | undefined; + /** 预留给摘要生成的 token 数,默认 2048 */ + reserveTokens?: number | undefined; + /** 自定义摘要指令 */ + customInstructions?: string | undefined; + /** 之前的摘要(用于增量更新) */ + previousSummary?: string | undefined; + /** AbortSignal */ + signal?: AbortSignal | undefined; +}; + +/** 默认摘要提示词 */ +const DEFAULT_SUMMARY_INSTRUCTIONS = `Summarize the conversation history concisely, focusing on: +- Key decisions made +- Important context and constraints +- Open questions or TODOs +- Technical details that may be needed later + +Keep the summary concise but complete. Use bullet points for clarity.`; + +/** + * 将消息分割为需要摘要的部分和保留的部分 + */ +export function splitMessagesForSummary( + messages: AgentMessage[], + availableTokens: number, + options?: { + targetRatio?: number | undefined; + minKeepMessages?: number | undefined; + }, +): { toSummarize: AgentMessage[]; toKeep: AgentMessage[] } | null { + const targetRatio = options?.targetRatio ?? 0.5; + const minKeep = options?.minKeepMessages ?? 10; + + if (messages.length <= minKeep) { + return null; // 消息太少,不需要压缩 + } + + const totalTokens = estimateMessagesTokens(messages); + const targetTokens = Math.floor(availableTokens * targetRatio); + + // 如果当前已经在目标内,不需要压缩 + if (totalTokens <= targetTokens) { + return null; + } + + // 从后往前保留消息 + const toKeep: AgentMessage[] = []; + let keptTokens = 0; + + for (let i = messages.length - 1; i >= 0; i--) { + const msg = messages[i]!; + const msgTokens = estimateTokens(msg); + + // 检查是否可以添加这条消息 + if (keptTokens + msgTokens <= targetTokens || toKeep.length < minKeep) { + toKeep.unshift(msg); + keptTokens += msgTokens; + } + + // 如果已经达到最小保留数且超过目标,停止 + if (toKeep.length >= minKeep && keptTokens >= targetTokens) { + break; + } + } + + // 需要摘要的消息 + const toSummarize = messages.slice(0, messages.length - toKeep.length); + + if (toSummarize.length === 0) { + return null; + } + + return { toSummarize, toKeep }; +} + +/** + * 创建摘要消息 + */ +function createSummaryMessage(summary: string, previousSummary?: string): AgentMessage { + const content = previousSummary + ? `## Previous Context Summary\n${previousSummary}\n\n## Recent Context Summary\n${summary}` + : `## Conversation Summary\n${summary}`; + + return { + role: "user", + content: `[System Note: The following is a summary of the earlier conversation history that has been compacted to save context space.]\n\n${content}\n\n[End of Summary]`, + timestamp: Date.now(), + }; +} + +/** + * 执行摘要式 Compaction + * + * 使用 LLM 生成历史消息的摘要,然后将摘要和最近的消息组合 + */ +export async function compactMessagesWithSummary( + params: SummaryCompactionParams, +): Promise { + const { + messages, + model, + apiKey, + availableTokens, + targetRatio, + minKeepMessages, + reserveTokens = 2048, + customInstructions, + previousSummary, + signal, + } = params; + + // 分割消息 + const split = splitMessagesForSummary(messages, availableTokens, { + targetRatio, + minKeepMessages, + }); + + if (!split) { + return null; + } + + const { toSummarize, toKeep } = split; + + // 生成摘要 + const instructions = customInstructions || DEFAULT_SUMMARY_INSTRUCTIONS; + const summary = await generateSummary( + toSummarize, + model, + reserveTokens, + apiKey, + signal, + instructions, + previousSummary, + ); + + // 创建摘要消息 + const summaryMessage = createSummaryMessage(summary, previousSummary); + + // 组合结果 + const kept = [summaryMessage, ...toKeep]; + + const tokensRemoved = estimateMessagesTokens(toSummarize); + const tokensKept = estimateMessagesTokens(kept); + + return { + kept, + removedCount: toSummarize.length, + tokensRemoved, + tokensKept, + summary, + reason: "summary", + }; +} + +/** + * 分块生成摘要(用于超大历史) + * + * 当历史太大时,分块生成摘要然后合并 + */ +export async function compactMessagesWithChunkedSummary( + params: SummaryCompactionParams & { + maxChunkTokens?: number | undefined; + }, +): Promise { + const { + messages, + model, + apiKey, + availableTokens, + targetRatio, + minKeepMessages, + reserveTokens = 2048, + customInstructions, + previousSummary, + signal, + maxChunkTokens = 50000, + } = params; + + // 分割消息 + const split = splitMessagesForSummary(messages, availableTokens, { + targetRatio, + minKeepMessages, + }); + + if (!split) { + return null; + } + + const { toSummarize, toKeep } = split; + + // 如果需要摘要的消息不多,直接摘要 + const toSummarizeTokens = estimateMessagesTokens(toSummarize); + if (toSummarizeTokens <= maxChunkTokens) { + return compactMessagesWithSummary(params); + } + + // 分块处理 + const chunks: AgentMessage[][] = []; + let currentChunk: AgentMessage[] = []; + let currentTokens = 0; + + for (const msg of toSummarize) { + const msgTokens = estimateTokens(msg); + + if (currentTokens + msgTokens > maxChunkTokens && currentChunk.length > 0) { + chunks.push(currentChunk); + currentChunk = []; + currentTokens = 0; + } + + currentChunk.push(msg); + currentTokens += msgTokens; + } + + if (currentChunk.length > 0) { + chunks.push(currentChunk); + } + + // 为每个块生成摘要 + const instructions = customInstructions || DEFAULT_SUMMARY_INSTRUCTIONS; + const chunkSummaries: string[] = []; + + let runningContext = previousSummary; + for (const chunk of chunks) { + const chunkSummary = await generateSummary( + chunk, + model, + reserveTokens, + apiKey, + signal, + instructions, + runningContext, + ); + chunkSummaries.push(chunkSummary); + runningContext = chunkSummary; + } + + // 最终摘要就是最后一个块的摘要(已经包含了之前的上下文) + const finalSummary = chunkSummaries[chunkSummaries.length - 1] ?? ""; + + // 创建摘要消息 + const summaryMessage = createSummaryMessage(finalSummary); + + // 组合结果 + const kept = [summaryMessage, ...toKeep]; + + const tokensRemoved = estimateMessagesTokens(toSummarize); + const tokensKept = estimateMessagesTokens(kept); + + return { + kept, + removedCount: toSummarize.length, + tokensRemoved, + tokensKept, + summary: finalSummary, + reason: "summary", + }; +} diff --git a/src/agent/context-window/token-estimation.ts b/src/agent/context-window/token-estimation.ts new file mode 100644 index 00000000..e0a67018 --- /dev/null +++ b/src/agent/context-window/token-estimation.ts @@ -0,0 +1,156 @@ +/** + * Token 估算工具 + * + * 提供消息和系统提示词的 token 计数功能 + */ + +import type { AgentMessage } from "@mariozechner/pi-agent-core"; +import { estimateTokens } from "@mariozechner/pi-coding-agent"; +import type { TokenEstimation, TokenAwareCompactionResult } from "./types.js"; + +/** 安全边界系数,用于补偿估算不准确 */ +export const ESTIMATION_SAFETY_MARGIN = 1.2; // 20% buffer + +/** 触发 compaction 的利用率阈值 */ +export const COMPACTION_TRIGGER_RATIO = 0.8; // 80% + +/** Compaction 目标利用率 */ +export const COMPACTION_TARGET_RATIO = 0.5; // 50% + +/** 最小保留消息数 */ +export const MIN_KEEP_MESSAGES = 10; + +/** + * 估算消息数组的总 token 数 + */ +export function estimateMessagesTokens(messages: AgentMessage[]): number { + return messages.reduce((sum, message) => sum + estimateTokens(message), 0); +} + +/** + * 估算系统提示词的 token 数 + */ +export function estimateSystemPromptTokens(systemPrompt: string | undefined): number { + if (!systemPrompt) return 0; + // 简单估算:约 4 字符 = 1 token(适用于英文/代码混合文本) + // 中文约 2 字符 = 1 token + // 取平均值 3 + return Math.ceil(systemPrompt.length / 3); +} + +/** + * 计算完整的 token 使用情况 + */ +export function estimateTokenUsage(params: { + messages: AgentMessage[]; + systemPrompt?: string | undefined; + contextWindowTokens: number; + reserveTokens?: number | undefined; +}): TokenEstimation { + const messageTokens = estimateMessagesTokens(params.messages); + const systemPromptTokens = estimateSystemPromptTokens(params.systemPrompt); + const reserve = params.reserveTokens ?? 1024; // 预留给响应生成 + + // 可用 token = 总窗口 - 系统提示 - 预留 + const availableTokens = Math.max( + 0, + params.contextWindowTokens - systemPromptTokens - reserve, + ); + + // 计算利用率(带安全边界) + const safeMessageTokens = messageTokens * ESTIMATION_SAFETY_MARGIN; + const utilizationRatio = availableTokens > 0 ? safeMessageTokens / availableTokens : 1; + + return { + messageTokens, + systemPromptTokens, + availableTokens, + utilizationRatio, + }; +} + +/** + * 判断是否需要 compaction + */ +export function shouldCompact(estimation: TokenEstimation): boolean { + return estimation.utilizationRatio >= COMPACTION_TRIGGER_RATIO; +} + +/** + * Token 感知的消息压缩 + * + * 策略:从最旧的消息开始移除,直到达到目标利用率 + */ +export function compactMessagesTokenAware( + messages: AgentMessage[], + availableTokens: number, + options?: { + targetRatio?: number; + minKeepMessages?: number; + }, +): TokenAwareCompactionResult | null { + const targetRatio = options?.targetRatio ?? COMPACTION_TARGET_RATIO; + const minKeep = options?.minKeepMessages ?? MIN_KEEP_MESSAGES; + + if (messages.length <= minKeep) { + return null; // 消息太少,不压缩 + } + + const currentTokens = estimateMessagesTokens(messages); + const targetTokens = Math.floor(availableTokens * targetRatio); + + // 如果当前已经在目标内,不需要压缩 + if (currentTokens <= targetTokens) { + return null; + } + + // 从后往前保留消息,直到达到目标 token 数 + const kept: AgentMessage[] = []; + let keptTokens = 0; + + // 反向遍历,保留最新的消息 + for (let i = messages.length - 1; i >= 0; i--) { + const msg = messages[i]!; + const msgTokens = estimateTokens(msg); + + // 检查是否可以添加这条消息 + if (keptTokens + msgTokens <= targetTokens || kept.length < minKeep) { + kept.unshift(msg); + keptTokens += msgTokens; + } + + // 如果已经达到最小保留数且超过目标,停止 + if (kept.length >= minKeep && keptTokens >= targetTokens) { + break; + } + } + + // 如果保留的消息数量不变,说明没有压缩 + if (kept.length >= messages.length) { + return null; + } + + const removedCount = messages.length - kept.length; + const tokensRemoved = currentTokens - keptTokens; + + return { + kept, + removedCount, + tokensRemoved, + tokensKept: keptTokens, + }; +} + +/** + * 检查单条消息是否过大 + * + * 如果单条消息超过 context window 的一定比例,可能需要特殊处理 + */ +export function isMessageOversized( + message: AgentMessage, + contextWindowTokens: number, + maxRatio: number = 0.5, +): boolean { + const tokens = estimateTokens(message) * ESTIMATION_SAFETY_MARGIN; + return tokens > contextWindowTokens * maxRatio; +} diff --git a/src/agent/context-window/types.ts b/src/agent/context-window/types.ts new file mode 100644 index 00000000..11cbb8ee --- /dev/null +++ b/src/agent/context-window/types.ts @@ -0,0 +1,48 @@ +/** + * Context Window Guard - 类型定义 + * + * 用于管理和验证 LLM 上下文窗口限制 + */ + +/** Context window 信息来源 */ +export type ContextWindowSource = "model" | "config" | "default"; + +/** Context window 信息 */ +export type ContextWindowInfo = { + /** Token 数量 */ + tokens: number; + /** 来源 */ + source: ContextWindowSource; +}; + +/** Context window guard 验证结果 */ +export type ContextWindowGuardResult = ContextWindowInfo & { + /** 是否需要警告(窗口较小) */ + shouldWarn: boolean; + /** 是否应该阻止运行(窗口太小) */ + shouldBlock: boolean; +}; + +/** Token 估算结果 */ +export type TokenEstimation = { + /** 消息总 token 数 */ + messageTokens: number; + /** 系统提示词 token 数 */ + systemPromptTokens: number; + /** 可用 token 数 */ + availableTokens: number; + /** 使用率 (0-1) */ + utilizationRatio: number; +}; + +/** Compaction 结果(带 token 信息) */ +export type TokenAwareCompactionResult = { + /** 保留的消息 */ + kept: import("@mariozechner/pi-agent-core").AgentMessage[]; + /** 移除的消息数量 */ + removedCount: number; + /** 移除的 token 数 */ + tokensRemoved: number; + /** 保留的 token 数 */ + tokensKept: number; +}; diff --git a/src/agent/index.ts b/src/agent/index.ts index 4f53d09f..6f569f3f 100644 --- a/src/agent/index.ts +++ b/src/agent/index.ts @@ -1,3 +1,4 @@ export * from "./runner.js"; export * from "./types.js"; export * from "./profile/index.js"; +export * from "./context-window/index.js"; diff --git a/src/agent/runner.ts b/src/agent/runner.ts index 4efcb1b7..da7da224 100644 --- a/src/agent/runner.ts +++ b/src/agent/runner.ts @@ -5,12 +5,45 @@ import { createAgentOutput } from "./output.js"; import { resolveModel, resolveTools } from "./tools.js"; import { SessionManager } from "./session/session-manager.js"; import { ProfileManager } from "./profile/index.js"; +import { + checkContextWindow, + DEFAULT_CONTEXT_TOKENS, + type ContextWindowGuardResult, +} from "./context-window/index.js"; + +/** + * 根据 provider 获取 API Key + */ +function resolveApiKey(provider: string): string | undefined { + const providerEnvMap: Record = { + openai: "OPENAI_API_KEY", + anthropic: "ANTHROPIC_API_KEY", + google: "GOOGLE_API_KEY", + "google-genai": "GOOGLE_API_KEY", + kimi: "MOONSHOT_API_KEY", + "kimi-coding": "MOONSHOT_API_KEY", + deepseek: "DEEPSEEK_API_KEY", + groq: "GROQ_API_KEY", + mistral: "MISTRAL_API_KEY", + together: "TOGETHER_API_KEY", + }; + + const envVar = providerEnvMap[provider]; + if (envVar) { + return process.env[envVar]; + } + + // 尝试通用格式: PROVIDER_API_KEY + const normalizedProvider = provider.toUpperCase().replace(/-/g, "_"); + return process.env[`${normalizedProvider}_API_KEY`]; +} export class Agent { private readonly agent: PiAgentCore; private readonly output; private readonly session: SessionManager; private readonly profile?: ProfileManager; + private readonly contextWindowGuard: ContextWindowGuardResult; /** 当前会话 ID */ readonly sessionId: string; @@ -23,34 +56,87 @@ export class Agent { this.agent = new PiAgentCore(); // 加载 Agent Profile(如果指定了 profileId) + let systemPrompt: string | undefined; if (options.profileId) { this.profile = new ProfileManager({ profileId: options.profileId, baseDir: options.profileBaseDir, }); - const systemPrompt = this.profile.buildSystemPrompt(); + systemPrompt = this.profile.buildSystemPrompt(); if (systemPrompt) { this.agent.setSystemPrompt(systemPrompt); } } else if (options.systemPrompt) { // 直接使用传入的 systemPrompt + systemPrompt = options.systemPrompt; this.agent.setSystemPrompt(options.systemPrompt); } this.sessionId = options.sessionId ?? uuidv7(); - this.session = new SessionManager({ sessionId: this.sessionId }); - const storedMeta = this.session.getMeta(); - if (!options.thinkingLevel && storedMeta?.thinkingLevel) { - this.agent.setThinkingLevel(storedMeta.thinkingLevel as any); - } else if (options.thinkingLevel) { - this.agent.setThinkingLevel(options.thinkingLevel); - } + + // 解析 model(用于获取 context window) + const storedMeta = (() => { + // 临时创建 session 获取 meta,避免循环依赖 + const tempSession = new SessionManager({ sessionId: this.sessionId }); + return tempSession.getMeta(); + })(); const model = options.provider && options.model ? resolveModel(options) : resolveModel({ ...options, provider: storedMeta?.provider, model: storedMeta?.model, }); + + // === Context Window Guard === + this.contextWindowGuard = checkContextWindow({ + modelContextWindow: model.contextWindow, + configContextTokens: options.contextWindowTokens, + defaultTokens: DEFAULT_CONTEXT_TOKENS, + }); + + // 警告:context window 较小 + if (this.contextWindowGuard.shouldWarn) { + stderr.write( + `[Context Window Guard] WARNING: Low context window: ${this.contextWindowGuard.tokens} tokens (source: ${this.contextWindowGuard.source})\n`, + ); + } + + // 阻止:context window 太小 + if (this.contextWindowGuard.shouldBlock) { + throw new Error( + `[Context Window Guard] Context window too small: ${this.contextWindowGuard.tokens} tokens. ` + + `Minimum required: 16,000 tokens. Please use a model with a larger context window.`, + ); + } + + // 确定 compaction 模式 + const compactionMode = options.compactionMode ?? "tokens"; // 默认使用 token 模式 + + // 获取 API Key(用于 summary 模式) + const apiKey = compactionMode === "summary" ? resolveApiKey(model.provider) : undefined; + + // 创建 SessionManager(带 context window 配置) + this.session = new SessionManager({ + sessionId: this.sessionId, + compactionMode, + // Token 模式参数 + contextWindowTokens: this.contextWindowGuard.tokens, + systemPrompt, + reserveTokens: options.reserveTokens, + targetRatio: options.compactionTargetRatio, + minKeepMessages: options.minKeepMessages, + // Summary 模式参数 + model: compactionMode === "summary" ? model : undefined, + apiKey, + customInstructions: options.summaryInstructions, + }); + + if (!options.thinkingLevel && storedMeta?.thinkingLevel) { + this.agent.setThinkingLevel(storedMeta.thinkingLevel as any); + } else if (options.thinkingLevel) { + this.agent.setThinkingLevel(options.thinkingLevel); + } + this.agent.setModel(model); this.agent.setTools(resolveTools(options)); @@ -63,6 +149,7 @@ export class Agent { provider: this.agent.state.model?.provider, model: this.agent.state.model?.id, thinkingLevel: this.agent.state.thinkingLevel, + contextWindowTokens: this.contextWindowGuard.tokens, }); this.agent.subscribe((event: AgentEvent) => { diff --git a/src/agent/session/compaction.ts b/src/agent/session/compaction.ts index dec56363..636c36c0 100644 --- a/src/agent/session/compaction.ts +++ b/src/agent/session/compaction.ts @@ -1,15 +1,186 @@ import type { AgentMessage } from "@mariozechner/pi-agent-core"; +import type { Model } from "@mariozechner/pi-ai"; +import { + estimateMessagesTokens, + compactMessagesTokenAware, + estimateTokenUsage, + shouldCompact as shouldCompactTokens, + compactMessagesWithSummary, + compactMessagesWithChunkedSummary, + COMPACTION_TARGET_RATIO, + MIN_KEEP_MESSAGES, +} from "../context-window/index.js"; export type CompactionResult = { kept: AgentMessage[]; removedCount: number; + /** Token 感知模式下的额外信息 */ + tokensRemoved?: number | undefined; + tokensKept?: number | undefined; + /** 摘要模式下生成的摘要 */ + summary?: string | undefined; + reason: "count" | "tokens" | "summary"; }; -export function compactMessages(messages: AgentMessage[], maxMessages: number, keepLast: number) { +/** + * 基于消息数量的简单压缩(旧逻辑,保持向后兼容) + */ +export function compactMessagesByCount( + messages: AgentMessage[], + maxMessages: number, + keepLast: number, +): CompactionResult | null { if (messages.length <= maxMessages) return null; const kept = messages.slice(-keepLast); return { kept, removedCount: messages.length - kept.length, - } satisfies CompactionResult; + reason: "count", + }; +} + +/** + * 基于 token 的智能压缩 + */ +export function compactMessagesByTokens( + messages: AgentMessage[], + availableTokens: number, + options?: { + targetRatio?: number; + minKeepMessages?: number; + }, +): CompactionResult | null { + const result = compactMessagesTokenAware(messages, availableTokens, options); + if (!result) return null; + + return { + kept: result.kept, + removedCount: result.removedCount, + tokensRemoved: result.tokensRemoved, + tokensKept: result.tokensKept, + reason: "tokens", + }; +} + +/** 同步压缩选项(count/tokens 模式) */ +export type SyncCompactionOptions = { + mode: "count" | "tokens"; + // count 模式参数 + maxMessages?: number | undefined; + keepLast?: number | undefined; + // tokens 模式参数 + contextWindowTokens?: number | undefined; + systemPrompt?: string | undefined; + reserveTokens?: number | undefined; + targetRatio?: number | undefined; + minKeepMessages?: number | undefined; +}; + +/** 摘要压缩选项(summary 模式) */ +export type SummaryCompactionOptions = { + mode: "summary"; + // 必需参数 + model: Model; + apiKey: string; + // tokens 模式参数(复用) + contextWindowTokens?: number | undefined; + systemPrompt?: string | undefined; + reserveTokens?: number | undefined; + targetRatio?: number | undefined; + minKeepMessages?: number | undefined; + // summary 特有参数 + customInstructions?: string | undefined; + previousSummary?: string | undefined; + signal?: AbortSignal | undefined; + maxChunkTokens?: number | undefined; +}; + +export type CompactionOptions = SyncCompactionOptions | SummaryCompactionOptions; + +/** + * 统一的压缩入口(同步版本,用于 count/tokens 模式) + * + * 根据模式选择压缩策略 + */ +export function compactMessages( + messages: AgentMessage[], + options: SyncCompactionOptions, +): CompactionResult | null { + if (options.mode === "count") { + return compactMessagesByCount( + messages, + options.maxMessages ?? 80, + options.keepLast ?? 60, + ); + } + + // Token 模式 + const contextWindowTokens = options.contextWindowTokens ?? 200_000; + const estimation = estimateTokenUsage({ + messages, + systemPrompt: options.systemPrompt, + contextWindowTokens, + reserveTokens: options.reserveTokens, + }); + + // 检查是否需要压缩 + if (!shouldCompactTokens(estimation)) { + return null; + } + + return compactMessagesByTokens(messages, estimation.availableTokens, { + targetRatio: options.targetRatio ?? COMPACTION_TARGET_RATIO, + minKeepMessages: options.minKeepMessages ?? MIN_KEEP_MESSAGES, + }); +} + +/** + * 摘要式压缩(异步版本) + * + * 使用 LLM 生成历史消息的摘要 + */ +export async function compactMessagesAsync( + messages: AgentMessage[], + options: SummaryCompactionOptions, +): Promise { + const contextWindowTokens = options.contextWindowTokens ?? 200_000; + const estimation = estimateTokenUsage({ + messages, + systemPrompt: options.systemPrompt, + contextWindowTokens, + reserveTokens: options.reserveTokens, + }); + + // 检查是否需要压缩 + if (!shouldCompactTokens(estimation)) { + return null; + } + + // 使用分块摘要处理超大历史 + const result = await compactMessagesWithChunkedSummary({ + messages, + model: options.model, + apiKey: options.apiKey, + availableTokens: estimation.availableTokens, + targetRatio: options.targetRatio ?? COMPACTION_TARGET_RATIO, + minKeepMessages: options.minKeepMessages ?? MIN_KEEP_MESSAGES, + reserveTokens: options.reserveTokens ?? 2048, + customInstructions: options.customInstructions, + previousSummary: options.previousSummary, + signal: options.signal, + maxChunkTokens: options.maxChunkTokens, + }); + + if (!result) { + return null; + } + + return { + kept: result.kept, + removedCount: result.removedCount, + tokensRemoved: result.tokensRemoved, + tokensKept: result.tokensKept, + summary: result.summary, + reason: "summary", + }; } diff --git a/src/agent/session/session-manager.ts b/src/agent/session/session-manager.ts index b7c6d8bb..17f13fcd 100644 --- a/src/agent/session/session-manager.ts +++ b/src/agent/session/session-manager.ts @@ -1,31 +1,125 @@ import type { AgentMessage } from "@mariozechner/pi-agent-core"; +import type { Model } from "@mariozechner/pi-ai"; import type { SessionEntry, SessionMeta } from "./types.js"; import { appendEntry, readEntries, writeEntries } from "./storage.js"; -import { compactMessages } from "./compaction.js"; +import { compactMessages, compactMessagesAsync } from "./compaction.js"; export type SessionManagerOptions = { sessionId: string; - baseDir?: string; - maxMessages?: number; - keepLast?: number; + baseDir?: string | undefined; + + // Compaction 模式配置 + /** Compaction 模式: "count" 使用消息计数, "tokens" 使用 token 感知, "summary" 使用 LLM 摘要 */ + compactionMode?: "count" | "tokens" | "summary" | undefined; + + // Count 模式参数 + maxMessages?: number | undefined; + keepLast?: number | undefined; + + // Token 模式参数 + /** Context window token 数 */ + contextWindowTokens?: number | undefined; + /** 系统提示词(用于计算可用 token) */ + systemPrompt?: string | undefined; + /** 预留给响应的 token 数 */ + reserveTokens?: number | undefined; + /** Compaction 目标利用率 (0-1) */ + targetRatio?: number | undefined; + /** 最小保留消息数 */ + minKeepMessages?: number | undefined; + + // Summary 模式参数 + /** LLM Model(用于生成摘要) */ + model?: Model | undefined; + /** API Key */ + apiKey?: string | undefined; + /** 自定义摘要指令 */ + customInstructions?: string | undefined; }; export class SessionManager { private readonly sessionId: string; private readonly baseDir: string | undefined; + private readonly compactionMode: "count" | "tokens" | "summary"; + // Count 模式 private readonly maxMessages: number; private readonly keepLast: number; + // Token 模式 + private readonly contextWindowTokens: number; + private systemPrompt: string | undefined; + private readonly reserveTokens: number; + private readonly targetRatio: number; + private readonly minKeepMessages: number; + // Summary 模式 + private model: Model | undefined; + private apiKey: string | undefined; + private readonly customInstructions: string | undefined; + private previousSummary: string | undefined; + private queue: Promise = Promise.resolve(); private meta: SessionMeta | undefined; constructor(options: SessionManagerOptions) { this.sessionId = options.sessionId; this.baseDir = options.baseDir; + + // Compaction 模式 + this.compactionMode = options.compactionMode ?? "count"; + + // Count 模式参数 this.maxMessages = options.maxMessages ?? 80; this.keepLast = options.keepLast ?? 60; + + // Token 模式参数 + this.contextWindowTokens = options.contextWindowTokens ?? 200_000; + this.systemPrompt = options.systemPrompt; + this.reserveTokens = options.reserveTokens ?? 1024; + this.targetRatio = options.targetRatio ?? 0.5; + this.minKeepMessages = options.minKeepMessages ?? 10; + + // Summary 模式参数 + this.model = options.model; + this.apiKey = options.apiKey; + this.customInstructions = options.customInstructions; + this.meta = this.loadMeta(); } + /** + * 更新系统提示词(用于 token 模式计算) + */ + setSystemPrompt(systemPrompt: string | undefined) { + this.systemPrompt = systemPrompt; + } + + /** + * 获取当前 context window token 数 + */ + getContextWindowTokens(): number { + return this.contextWindowTokens; + } + + /** + * 设置 LLM Model(用于 summary 模式) + */ + setModel(model: Model | undefined) { + this.model = model; + } + + /** + * 设置 API Key(用于 summary 模式) + */ + setApiKey(apiKey: string | undefined) { + this.apiKey = apiKey; + } + + /** + * 获取当前 compaction 模式 + */ + getCompactionMode(): "count" | "tokens" | "summary" { + return this.compactionMode; + } + loadEntries(): SessionEntry[] { return readEntries(this.sessionId, { baseDir: this.baseDir }); } @@ -74,8 +168,56 @@ export class SessionManager { } async maybeCompact(messages: AgentMessage[]) { - const result = compactMessages(messages, this.maxMessages, this.keepLast); + let result; + + if (this.compactionMode === "summary") { + // Summary 模式需要 model 和 apiKey + if (!this.model || !this.apiKey) { + // 降级到 tokens 模式 + result = compactMessages(messages, { + mode: "tokens", + contextWindowTokens: this.contextWindowTokens, + systemPrompt: this.systemPrompt, + reserveTokens: this.reserveTokens, + targetRatio: this.targetRatio, + minKeepMessages: this.minKeepMessages, + }); + } else { + result = await compactMessagesAsync(messages, { + mode: "summary", + model: this.model, + apiKey: this.apiKey, + contextWindowTokens: this.contextWindowTokens, + systemPrompt: this.systemPrompt, + reserveTokens: this.reserveTokens, + targetRatio: this.targetRatio, + minKeepMessages: this.minKeepMessages, + customInstructions: this.customInstructions, + previousSummary: this.previousSummary, + }); + + // 保存摘要用于下次增量更新 + if (result?.summary) { + this.previousSummary = result.summary; + } + } + } else { + result = compactMessages(messages, { + mode: this.compactionMode, + // Count 模式参数 + maxMessages: this.maxMessages, + keepLast: this.keepLast, + // Token 模式参数 + contextWindowTokens: this.contextWindowTokens, + systemPrompt: this.systemPrompt, + reserveTokens: this.reserveTokens, + targetRatio: this.targetRatio, + minKeepMessages: this.minKeepMessages, + }); + } + if (!result) return null; + const entries: SessionEntry[] = []; if (this.meta) { entries.push({ type: "meta", meta: this.meta, timestamp: Date.now() }); @@ -88,7 +230,13 @@ export class SessionManager { removed: result.removedCount, kept: result.kept.length, timestamp: Date.now(), + // Token/Summary 模式下的额外信息 + tokensRemoved: result.tokensRemoved, + tokensKept: result.tokensKept, + summary: result.summary, + reason: result.reason, }); + await this.enqueue(() => writeEntries(this.sessionId, entries, { baseDir: this.baseDir }), ); diff --git a/src/agent/session/types.ts b/src/agent/session/types.ts index cbc82685..c9086b74 100644 --- a/src/agent/session/types.ts +++ b/src/agent/session/types.ts @@ -4,9 +4,22 @@ export type SessionMeta = { provider?: string; model?: string; thinkingLevel?: string; + /** Context window token 数 */ + contextWindowTokens?: number; }; export type SessionEntry = | { type: "message"; message: AgentMessage; timestamp: number } | { type: "meta"; meta: SessionMeta; timestamp: number } - | { type: "compaction"; removed: number; kept: number; timestamp: number }; + | { + type: "compaction"; + removed: number; + kept: number; + timestamp: number; + /** Token 感知 compaction 信息(可选,向后兼容) */ + tokensRemoved?: number | undefined; + tokensKept?: number | undefined; + /** 摘要模式生成的摘要 */ + summary?: string | undefined; + reason?: "count" | "tokens" | "summary" | undefined; + }; diff --git a/src/agent/types.ts b/src/agent/types.ts index 9773b6ce..fc7ad9bd 100644 --- a/src/agent/types.ts +++ b/src/agent/types.ts @@ -24,4 +24,25 @@ export type AgentOptions = { cwd?: string | undefined; sessionId?: string | undefined; logger?: AgentLogger | undefined; + + // === Context Window Guard 配置 === + /** 手动指定 context window token 数(覆盖 model 的值) */ + contextWindowTokens?: number | undefined; + /** 预留给响应生成的 token 数,默认 1024 */ + reserveTokens?: number | undefined; + /** + * Compaction 模式: + * - "count": 使用旧的消息计数 + * - "tokens": 使用 token 感知(默认) + * - "summary": 使用 LLM 生成摘要 + */ + compactionMode?: "count" | "tokens" | "summary" | undefined; + /** Compaction 目标利用率 (0-1),默认 0.5 */ + compactionTargetRatio?: number | undefined; + /** 最小保留消息数,默认 10 */ + minKeepMessages?: number | undefined; + + // === Summary Compaction 配置 === + /** 自定义摘要生成指令 */ + summaryInstructions?: string | undefined; };