feat(agent): add context window management with token-aware compaction (#14)

* feat(agent): add context window guard to prevent token overflow

Implement token-aware context management that validates context window
size on agent initialization and provides intelligent message compaction
based on actual token usage rather than simple message count.

Key changes:
- Add context-window module with guard, token estimation, and types
- Support both "count" (legacy) and "tokens" (new default) compaction modes
- Warn when context window < 32K tokens, block when < 16K tokens
- Trigger compaction at 80% utilization, target 50% after compaction

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

* feat(agent): add summary-based compaction using LLM

Implement intelligent compaction that uses LLM to generate summaries
of older messages instead of simply truncating them. This preserves
important context like key decisions, TODOs, and technical details.

Key changes:
- Add summarization.ts with compactMessagesWithSummary functions
- Support chunked summarization for very large histories
- Add "summary" compaction mode alongside "count" and "tokens"
- Auto-resolve API key from environment based on provider
- Graceful fallback to "tokens" mode if model/apiKey unavailable

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Jiayuan 2026-01-30 03:46:11 +08:00 committed by GitHub
parent 3024e89071
commit 67cd46a072
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 1116 additions and 16 deletions

View file

@ -1,15 +1,186 @@
import type { AgentMessage } from "@mariozechner/pi-agent-core";
import type { Model } from "@mariozechner/pi-ai";
import {
estimateMessagesTokens,
compactMessagesTokenAware,
estimateTokenUsage,
shouldCompact as shouldCompactTokens,
compactMessagesWithSummary,
compactMessagesWithChunkedSummary,
COMPACTION_TARGET_RATIO,
MIN_KEEP_MESSAGES,
} from "../context-window/index.js";
export type CompactionResult = {
kept: AgentMessage[];
removedCount: number;
/** Token 感知模式下的额外信息 */
tokensRemoved?: number | undefined;
tokensKept?: number | undefined;
/** 摘要模式下生成的摘要 */
summary?: string | undefined;
reason: "count" | "tokens" | "summary";
};
export function compactMessages(messages: AgentMessage[], maxMessages: number, keepLast: number) {
/**
*
*/
export function compactMessagesByCount(
messages: AgentMessage[],
maxMessages: number,
keepLast: number,
): CompactionResult | null {
if (messages.length <= maxMessages) return null;
const kept = messages.slice(-keepLast);
return {
kept,
removedCount: messages.length - kept.length,
} satisfies CompactionResult;
reason: "count",
};
}
/**
* token
*/
export function compactMessagesByTokens(
messages: AgentMessage[],
availableTokens: number,
options?: {
targetRatio?: number;
minKeepMessages?: number;
},
): CompactionResult | null {
const result = compactMessagesTokenAware(messages, availableTokens, options);
if (!result) return null;
return {
kept: result.kept,
removedCount: result.removedCount,
tokensRemoved: result.tokensRemoved,
tokensKept: result.tokensKept,
reason: "tokens",
};
}
/** 同步压缩选项count/tokens 模式) */
export type SyncCompactionOptions = {
mode: "count" | "tokens";
// count 模式参数
maxMessages?: number | undefined;
keepLast?: number | undefined;
// tokens 模式参数
contextWindowTokens?: number | undefined;
systemPrompt?: string | undefined;
reserveTokens?: number | undefined;
targetRatio?: number | undefined;
minKeepMessages?: number | undefined;
};
/** 摘要压缩选项summary 模式) */
export type SummaryCompactionOptions = {
mode: "summary";
// 必需参数
model: Model<any>;
apiKey: string;
// tokens 模式参数(复用)
contextWindowTokens?: number | undefined;
systemPrompt?: string | undefined;
reserveTokens?: number | undefined;
targetRatio?: number | undefined;
minKeepMessages?: number | undefined;
// summary 特有参数
customInstructions?: string | undefined;
previousSummary?: string | undefined;
signal?: AbortSignal | undefined;
maxChunkTokens?: number | undefined;
};
export type CompactionOptions = SyncCompactionOptions | SummaryCompactionOptions;
/**
* count/tokens
*
*
*/
export function compactMessages(
messages: AgentMessage[],
options: SyncCompactionOptions,
): CompactionResult | null {
if (options.mode === "count") {
return compactMessagesByCount(
messages,
options.maxMessages ?? 80,
options.keepLast ?? 60,
);
}
// Token 模式
const contextWindowTokens = options.contextWindowTokens ?? 200_000;
const estimation = estimateTokenUsage({
messages,
systemPrompt: options.systemPrompt,
contextWindowTokens,
reserveTokens: options.reserveTokens,
});
// 检查是否需要压缩
if (!shouldCompactTokens(estimation)) {
return null;
}
return compactMessagesByTokens(messages, estimation.availableTokens, {
targetRatio: options.targetRatio ?? COMPACTION_TARGET_RATIO,
minKeepMessages: options.minKeepMessages ?? MIN_KEEP_MESSAGES,
});
}
/**
*
*
* 使 LLM
*/
export async function compactMessagesAsync(
messages: AgentMessage[],
options: SummaryCompactionOptions,
): Promise<CompactionResult | null> {
const contextWindowTokens = options.contextWindowTokens ?? 200_000;
const estimation = estimateTokenUsage({
messages,
systemPrompt: options.systemPrompt,
contextWindowTokens,
reserveTokens: options.reserveTokens,
});
// 检查是否需要压缩
if (!shouldCompactTokens(estimation)) {
return null;
}
// 使用分块摘要处理超大历史
const result = await compactMessagesWithChunkedSummary({
messages,
model: options.model,
apiKey: options.apiKey,
availableTokens: estimation.availableTokens,
targetRatio: options.targetRatio ?? COMPACTION_TARGET_RATIO,
minKeepMessages: options.minKeepMessages ?? MIN_KEEP_MESSAGES,
reserveTokens: options.reserveTokens ?? 2048,
customInstructions: options.customInstructions,
previousSummary: options.previousSummary,
signal: options.signal,
maxChunkTokens: options.maxChunkTokens,
});
if (!result) {
return null;
}
return {
kept: result.kept,
removedCount: result.removedCount,
tokensRemoved: result.tokensRemoved,
tokensKept: result.tokensKept,
summary: result.summary,
reason: "summary",
};
}

View file

@ -1,31 +1,125 @@
import type { AgentMessage } from "@mariozechner/pi-agent-core";
import type { Model } from "@mariozechner/pi-ai";
import type { SessionEntry, SessionMeta } from "./types.js";
import { appendEntry, readEntries, writeEntries } from "./storage.js";
import { compactMessages } from "./compaction.js";
import { compactMessages, compactMessagesAsync } from "./compaction.js";
export type SessionManagerOptions = {
sessionId: string;
baseDir?: string;
maxMessages?: number;
keepLast?: number;
baseDir?: string | undefined;
// Compaction 模式配置
/** Compaction 模式: "count" 使用消息计数, "tokens" 使用 token 感知, "summary" 使用 LLM 摘要 */
compactionMode?: "count" | "tokens" | "summary" | undefined;
// Count 模式参数
maxMessages?: number | undefined;
keepLast?: number | undefined;
// Token 模式参数
/** Context window token 数 */
contextWindowTokens?: number | undefined;
/** 系统提示词(用于计算可用 token */
systemPrompt?: string | undefined;
/** 预留给响应的 token 数 */
reserveTokens?: number | undefined;
/** Compaction 目标利用率 (0-1) */
targetRatio?: number | undefined;
/** 最小保留消息数 */
minKeepMessages?: number | undefined;
// Summary 模式参数
/** LLM Model用于生成摘要 */
model?: Model<any> | undefined;
/** API Key */
apiKey?: string | undefined;
/** 自定义摘要指令 */
customInstructions?: string | undefined;
};
export class SessionManager {
private readonly sessionId: string;
private readonly baseDir: string | undefined;
private readonly compactionMode: "count" | "tokens" | "summary";
// Count 模式
private readonly maxMessages: number;
private readonly keepLast: number;
// Token 模式
private readonly contextWindowTokens: number;
private systemPrompt: string | undefined;
private readonly reserveTokens: number;
private readonly targetRatio: number;
private readonly minKeepMessages: number;
// Summary 模式
private model: Model<any> | undefined;
private apiKey: string | undefined;
private readonly customInstructions: string | undefined;
private previousSummary: string | undefined;
private queue: Promise<void> = Promise.resolve();
private meta: SessionMeta | undefined;
constructor(options: SessionManagerOptions) {
this.sessionId = options.sessionId;
this.baseDir = options.baseDir;
// Compaction 模式
this.compactionMode = options.compactionMode ?? "count";
// Count 模式参数
this.maxMessages = options.maxMessages ?? 80;
this.keepLast = options.keepLast ?? 60;
// Token 模式参数
this.contextWindowTokens = options.contextWindowTokens ?? 200_000;
this.systemPrompt = options.systemPrompt;
this.reserveTokens = options.reserveTokens ?? 1024;
this.targetRatio = options.targetRatio ?? 0.5;
this.minKeepMessages = options.minKeepMessages ?? 10;
// Summary 模式参数
this.model = options.model;
this.apiKey = options.apiKey;
this.customInstructions = options.customInstructions;
this.meta = this.loadMeta();
}
/**
* token
*/
setSystemPrompt(systemPrompt: string | undefined) {
this.systemPrompt = systemPrompt;
}
/**
* context window token
*/
getContextWindowTokens(): number {
return this.contextWindowTokens;
}
/**
* LLM Model summary
*/
setModel(model: Model<any> | undefined) {
this.model = model;
}
/**
* API Key summary
*/
setApiKey(apiKey: string | undefined) {
this.apiKey = apiKey;
}
/**
* compaction
*/
getCompactionMode(): "count" | "tokens" | "summary" {
return this.compactionMode;
}
loadEntries(): SessionEntry[] {
return readEntries(this.sessionId, { baseDir: this.baseDir });
}
@ -74,8 +168,56 @@ export class SessionManager {
}
async maybeCompact(messages: AgentMessage[]) {
const result = compactMessages(messages, this.maxMessages, this.keepLast);
let result;
if (this.compactionMode === "summary") {
// Summary 模式需要 model 和 apiKey
if (!this.model || !this.apiKey) {
// 降级到 tokens 模式
result = compactMessages(messages, {
mode: "tokens",
contextWindowTokens: this.contextWindowTokens,
systemPrompt: this.systemPrompt,
reserveTokens: this.reserveTokens,
targetRatio: this.targetRatio,
minKeepMessages: this.minKeepMessages,
});
} else {
result = await compactMessagesAsync(messages, {
mode: "summary",
model: this.model,
apiKey: this.apiKey,
contextWindowTokens: this.contextWindowTokens,
systemPrompt: this.systemPrompt,
reserveTokens: this.reserveTokens,
targetRatio: this.targetRatio,
minKeepMessages: this.minKeepMessages,
customInstructions: this.customInstructions,
previousSummary: this.previousSummary,
});
// 保存摘要用于下次增量更新
if (result?.summary) {
this.previousSummary = result.summary;
}
}
} else {
result = compactMessages(messages, {
mode: this.compactionMode,
// Count 模式参数
maxMessages: this.maxMessages,
keepLast: this.keepLast,
// Token 模式参数
contextWindowTokens: this.contextWindowTokens,
systemPrompt: this.systemPrompt,
reserveTokens: this.reserveTokens,
targetRatio: this.targetRatio,
minKeepMessages: this.minKeepMessages,
});
}
if (!result) return null;
const entries: SessionEntry[] = [];
if (this.meta) {
entries.push({ type: "meta", meta: this.meta, timestamp: Date.now() });
@ -88,7 +230,13 @@ export class SessionManager {
removed: result.removedCount,
kept: result.kept.length,
timestamp: Date.now(),
// Token/Summary 模式下的额外信息
tokensRemoved: result.tokensRemoved,
tokensKept: result.tokensKept,
summary: result.summary,
reason: result.reason,
});
await this.enqueue(() =>
writeEntries(this.sessionId, entries, { baseDir: this.baseDir }),
);

View file

@ -4,9 +4,22 @@ export type SessionMeta = {
provider?: string;
model?: string;
thinkingLevel?: string;
/** Context window token 数 */
contextWindowTokens?: number;
};
export type SessionEntry =
| { type: "message"; message: AgentMessage; timestamp: number }
| { type: "meta"; meta: SessionMeta; timestamp: number }
| { type: "compaction"; removed: number; kept: number; timestamp: number };
| {
type: "compaction";
removed: number;
kept: number;
timestamp: number;
/** Token 感知 compaction 信息(可选,向后兼容) */
tokensRemoved?: number | undefined;
tokensKept?: number | undefined;
/** 摘要模式生成的摘要 */
summary?: string | undefined;
reason?: "count" | "tokens" | "summary" | undefined;
};