Merge remote-tracking branch 'origin/main' into feat/dashboard

This commit is contained in:
Naiyuan Qing 2026-02-12 17:38:49 +08:00
commit fd098c04eb
22 changed files with 1329 additions and 130 deletions

View file

@ -10,6 +10,23 @@ import { isHeartbeatAckEvent } from "../hub/heartbeat-filter.js";
const devNull = { write: () => true } as unknown as NodeJS.WritableStream;
const WRITEINTERNAL_RETRY_DELAY_MS = 5000;
/** Check if a runInternal error string indicates a transient failure worth retrying. */
function isTransientRunError(errorMsg: string): boolean {
const lower = errorMsg.toLowerCase();
if (lower.includes("terminated")) return true;
if (lower.includes("aborted")) return true;
if (lower.includes("econnreset")) return true;
if (lower.includes("etimedout")) return true;
if (lower.includes("socket hang up")) return true;
if (lower.includes("fetch failed")) return true;
if (lower.includes("timeout") || lower.includes("timed out")) return true;
if (/\b(429|502|503|504)\b/.test(lower)) return true;
if (lower.includes("overloaded")) return true;
return false;
}
/** Discriminated union of legacy Message, raw AgentEvent, and MulticaEvent */
export type ChannelItem = Message | AgentEvent | MulticaEvent;
@ -122,30 +139,54 @@ export class AsyncAgent {
.then(async () => {
if (this._closed) return;
const prevForward = this.forwardInternalAssistant;
this.forwardInternalAssistant = forwardAssistant;
try {
const result = await this.agent.runInternal(content);
await this.agent.flushSession();
if (result.error) {
// Internal run errors are for diagnostics only; do not leak to user stream.
console.error(`[AsyncAgent] Internal run error: ${result.error}`);
}
// Stop forwarding BEFORE persist to avoid double-emitting the same
// assistant message (once from runInternal streaming, once from appendMessage).
this.forwardInternalAssistant = prevForward;
// Persist the LLM summary so it remains in parent context for future turns
if (persistResponse && result.text?.trim() && !isSilentReplyText(result.text)) {
this.agent.persistAssistantSummary(result.text.trim());
for (let attempt = 1; attempt <= 2; attempt++) {
this.forwardInternalAssistant = forwardAssistant;
try {
const result = await this.agent.runInternal(content);
await this.agent.flushSession();
if (result.error) {
if (attempt === 1 && isTransientRunError(result.error)) {
console.warn(
`[AsyncAgent] Internal run transient error: ${result.error}. Retrying in ${WRITEINTERNAL_RETRY_DELAY_MS}ms...`,
);
this.forwardInternalAssistant = prevForward;
await new Promise((r) => setTimeout(r, WRITEINTERNAL_RETRY_DELAY_MS));
continue;
}
// Final attempt or non-transient: log and give up
console.error(`[AsyncAgent] Internal run error: ${result.error}`);
this.forwardInternalAssistant = prevForward;
return;
}
// Success — stop forwarding BEFORE persist to avoid double-emitting
this.forwardInternalAssistant = prevForward;
if (persistResponse && result.text?.trim() && !isSilentReplyText(result.text)) {
this.agent.persistAssistantSummary(result.text.trim());
await this.agent.flushSession();
}
return;
} catch (err) {
const message = err instanceof Error ? err.message : String(err);
if (attempt === 1 && isTransientRunError(message)) {
console.warn(
`[AsyncAgent] Internal run exception: ${message}. Retrying in ${WRITEINTERNAL_RETRY_DELAY_MS}ms...`,
);
this.forwardInternalAssistant = prevForward;
await new Promise((r) => setTimeout(r, WRITEINTERNAL_RETRY_DELAY_MS));
continue;
}
console.error(`[AsyncAgent] Internal run failed: ${message}`);
this.forwardInternalAssistant = prevForward;
return;
}
} finally {
this.forwardInternalAssistant = prevForward;
}
})
.catch((err) => {
const message = err instanceof Error ? err.message : String(err);
// Internal run exceptions are for diagnostics only; do not leak to user stream.
console.error(`[AsyncAgent] Internal run failed: ${message}`);
console.error(`[AsyncAgent] Internal run failed (outer): ${message}`);
});
}

View file

@ -37,7 +37,7 @@ vi.mock("@mariozechner/pi-coding-agent", () => ({
describe("token-estimation", () => {
describe("constants", () => {
it("should have correct safety margin", () => {
expect(ESTIMATION_SAFETY_MARGIN).toBe(1.2);
expect(ESTIMATION_SAFETY_MARGIN).toBe(1.5);
});
it("should have correct compaction trigger ratio", () => {
@ -63,20 +63,20 @@ describe("token-estimation", () => {
});
it("should estimate tokens based on character count", () => {
// ~3 chars per token
expect(estimateSystemPromptTokens("abc")).toBe(1);
expect(estimateSystemPromptTokens("abcdef")).toBe(2);
expect(estimateSystemPromptTokens("abcdefghi")).toBe(3);
// ~2 chars per token (conservative for CJK/mixed content)
expect(estimateSystemPromptTokens("ab")).toBe(1);
expect(estimateSystemPromptTokens("abcd")).toBe(2);
expect(estimateSystemPromptTokens("abcdef")).toBe(3);
});
it("should ceil the result", () => {
// 4 chars / 3 = 1.33, should ceil to 2
expect(estimateSystemPromptTokens("abcd")).toBe(2);
// 3 chars / 2 = 1.5, should ceil to 2
expect(estimateSystemPromptTokens("abc")).toBe(2);
});
it("should handle long prompts", () => {
const longPrompt = "a".repeat(3000);
expect(estimateSystemPromptTokens(longPrompt)).toBe(1000);
expect(estimateSystemPromptTokens(longPrompt)).toBe(1500);
});
});
@ -140,7 +140,7 @@ describe("token-estimation", () => {
reserveTokens: 0,
});
// Utilization = (tokens * 1.2) / available
// Utilization = (tokens * 1.5) / available
expect(result.utilizationRatio).toBeGreaterThan(0);
});
});
@ -292,26 +292,26 @@ describe("token-estimation", () => {
content: "x".repeat(400), // ~100 tokens
} as AgentMessage;
// With safety margin 1.2, 100 * 1.2 = 120 tokens
// 120 > 1000 * 0.1 = 100, so oversized
// With safety margin 1.5, 100 * 1.5 = 150 tokens
// 150 > 1000 * 0.1 = 100, so oversized
expect(isMessageOversized(message, 1000, 0.1)).toBe(true);
// 120 < 1000 * 0.2 = 200, so not oversized
// 150 < 1000 * 0.2 = 200, so not oversized
expect(isMessageOversized(message, 1000, 0.2)).toBe(false);
});
it("should apply safety margin to token count", () => {
const message = {
role: "user",
content: "x".repeat(400), // ~100 tokens, with margin ~120
content: "x".repeat(400), // ~100 tokens, with margin ~150
} as AgentMessage;
// Without margin: 100 < 250 (50% of 500)
// With margin: 120 < 250, still ok
// With margin: 150 < 250, still ok
expect(isMessageOversized(message, 500, 0.5)).toBe(false);
// Without margin: 100 < 100 would be false
// With margin: 120 > 100, should be true
// With margin: 150 > 100, should be true
expect(isMessageOversized(message, 200, 0.5)).toBe(true);
});
});

View file

@ -9,7 +9,7 @@ import { estimateTokens } from "@mariozechner/pi-coding-agent";
import type { TokenEstimation, TokenAwareCompactionResult } from "./types.js";
/** Safety margin coefficient to compensate for estimation inaccuracy */
export const ESTIMATION_SAFETY_MARGIN = 1.2; // 20% buffer
export const ESTIMATION_SAFETY_MARGIN = 1.5; // 50% buffer (covers CJK and mixed content)
/** Utilization threshold for triggering compaction */
export const COMPACTION_TRIGGER_RATIO = 0.8; // 80%
@ -32,10 +32,10 @@ export function estimateMessagesTokens(messages: AgentMessage[]): number {
*/
export function estimateSystemPromptTokens(systemPrompt: string | undefined): number {
if (!systemPrompt) return 0;
// Simple estimation: ~4 chars = 1 token (for English/code mixed text)
// Chinese ~2 chars = 1 token
// Average value of 3
return Math.ceil(systemPrompt.length / 3);
// Conservative estimation: ~2 chars = 1 token
// English/code averages ~4 chars/token but CJK averages ~1-2 chars/token.
// Using /2 as a safe default to prevent underestimation on mixed content.
return Math.ceil(systemPrompt.length / 2);
}
/**

View file

@ -0,0 +1,21 @@
/**
* Error classification utilities for agent error handling.
*/
/**
* Check if an error is a context overflow / "prompt too long" error from any LLM provider.
*
* These errors indicate the request exceeded the model's context window and should
* trigger auto-compaction rather than auth profile rotation.
*/
export function isContextOverflowError(error: unknown): boolean {
const msg = (error instanceof Error ? error.message : String(error)).toLowerCase();
return (
msg.includes("prompt is too long") ||
msg.includes("context length exceeded") ||
msg.includes("maximum context length") ||
msg.includes("request_too_large") ||
msg.includes("request size exceeds") ||
(msg.includes("413") && msg.includes("too large"))
);
}

View file

@ -22,7 +22,14 @@ import {
checkContextWindow,
DEFAULT_CONTEXT_TOKENS,
type ContextWindowGuardResult,
estimateTokenUsage,
COMPACTION_TRIGGER_RATIO,
compactMessagesTokenAware,
MIN_KEEP_MESSAGES,
} from "./context-window/index.js";
import {
pruneToolResults,
} from "./context-window/tool-result-pruning.js";
import { mergeToolsConfig, type ToolsConfig } from "./tools/policy.js";
import {
loadAuthProfileStore,
@ -42,6 +49,7 @@ import {
sanitizeToolCallInputs,
sanitizeToolUseResultPairing,
} from "./session/session-transcript-repair.js";
import { isContextOverflowError } from "./errors.js";
// ============================================================
// Error classification for auth profile rotation
@ -89,11 +97,15 @@ export class Agent {
private readonly stderr: NodeJS.WritableStream;
private initialized = false;
// Context window settings (for pre-flight compaction)
private readonly reserveTokens: number;
// Internal run state
private _internalRun = false;
private _isRunning = false;
private _aborted = false;
private _runMutex: Promise<void> = Promise.resolve();
private _compactionPromise: Promise<void> = Promise.resolve();
private currentUserDisplayPrompt: string | undefined;
// MulticaEvent subscribers (parallel to PiAgentCore's subscriber list)
@ -188,8 +200,10 @@ export class Agent {
return this.currentApiKey;
},
transformContext: async (messages) => {
const sanitizedInputs = sanitizeToolCallInputs(messages);
return sanitizeToolUseResultPairing(sanitizedInputs);
let result = sanitizeToolCallInputs(messages);
result = sanitizeToolUseResultPairing(result);
result = this.preflightCompact(result);
return result;
},
});
@ -260,6 +274,9 @@ export class Agent {
? resolveApiKey(this.resolvedProvider, options.apiKey)
: undefined;
// Store reserveTokens for pre-flight compaction
this.reserveTokens = options.reserveTokens ?? 1024;
// 创建 SessionManager带 context window 配置)
this.session = new SessionManager({
sessionId: this.sessionId,
@ -425,6 +442,8 @@ export class Agent {
prompt: string,
options?: { displayPrompt?: string },
): Promise<AgentRunResult> {
// Wait for any in-flight compaction from the previous run
await this._compactionPromise;
await this.ensureInitialized();
this.refreshAuthState();
this.output.state.lastAssistantText = "";
@ -444,6 +463,9 @@ export class Agent {
const canRotate = !this.pinnedProfile && this.profileCandidates.length > 1;
let lastError: unknown;
const MAX_OVERFLOW_COMPACTION_ATTEMPTS = 2;
let overflowAttempts = 0;
// Loop to exhaust all candidate profiles on rotatable errors
while (true) {
try {
@ -452,6 +474,34 @@ export class Agent {
} catch (error) {
lastError = error;
// Context overflow recovery: auto-compact and retry before trying auth rotation
if (isContextOverflowError(error) && overflowAttempts < MAX_OVERFLOW_COMPACTION_ATTEMPTS) {
overflowAttempts++;
this.stderr.write(
`[context-overflow] Overflow detected (attempt ${overflowAttempts}/${MAX_OVERFLOW_COMPACTION_ATTEMPTS}), compacting...\n`,
);
const messages = this.agent.state.messages.slice();
const result = await this.session.maybeCompact(messages);
if (result?.kept) {
this.agent.replaceMessages(result.kept);
this.output.state.lastAssistantText = "";
continue; // retry with compacted messages
}
// Forced fallback: estimation may diverge from reality (the LLM
// already told us the context is too large), so drop the oldest
// half of messages even when maybeCompact thinks no compaction is needed.
if (messages.length > MIN_KEEP_MESSAGES) {
const keepCount = Math.max(MIN_KEEP_MESSAGES, Math.floor(messages.length / 2));
const forcedKept = messages.slice(-keepCount);
this.stderr.write(
`[context-overflow] Forced compaction: ${messages.length}${forcedKept.length} messages\n`,
);
this.agent.replaceMessages(forcedKept);
this.output.state.lastAssistantText = "";
continue;
}
}
const reason = classifyError(error);
if (this.currentProfileId && isRotatableError(reason)) {
markAuthProfileFailure(this.currentProfileId, reason);
@ -615,35 +665,88 @@ export class Agent {
// Skip compaction during internal runs — internal messages will be
// rolled back from memory afterwards, so compacting now would be incorrect.
if (message.role === "assistant" && !this._internalRun) {
void this.maybeCompact();
this._compactionPromise = this.maybeCompact().catch((err) => {
console.error("[Agent] Compaction failed:", err);
});
}
}
}
/**
* Pre-flight context compaction runs inside transformContext before every LLM call.
* Pure in-memory, no disk writes. Prunes tool results and drops oldest messages
* when the estimated token utilization exceeds the compaction trigger threshold.
*/
private preflightCompact(messages: AgentMessage[]): AgentMessage[] {
const estimation = estimateTokenUsage({
messages,
systemPrompt: this.agent.state.systemPrompt,
contextWindowTokens: this.contextWindowGuard.tokens,
reserveTokens: this.reserveTokens,
});
if (estimation.utilizationRatio < COMPACTION_TRIGGER_RATIO) {
return messages; // fast path
}
const originalCount = messages.length;
let result = messages;
// Phase 1: Prune tool results (soft trim + hard clear)
const pruneResult = pruneToolResults({
messages: result,
contextWindowTokens: this.contextWindowGuard.tokens,
});
if (pruneResult.changed) {
result = pruneResult.messages;
}
// Re-estimate after pruning
const afterPrune = estimateTokenUsage({
messages: result,
systemPrompt: this.agent.state.systemPrompt,
contextWindowTokens: this.contextWindowGuard.tokens,
reserveTokens: this.reserveTokens,
});
// Phase 2: Drop oldest messages if still over threshold
if (afterPrune.utilizationRatio >= COMPACTION_TRIGGER_RATIO) {
const compacted = compactMessagesTokenAware(result, afterPrune.availableTokens);
if (compacted) {
result = compacted.kept;
}
}
if (result.length < originalCount) {
const saved = originalCount - result.length;
this.stderr.write(
`[pre-flight compaction] pruned ${saved} messages (${originalCount}${result.length})\n`,
);
}
return result;
}
private async maybeCompact() {
const messages = this.agent.state.messages.slice();
if (!this.session.needsCompaction(messages)) return;
try {
const result = await this.session.maybeCompact(messages);
if (!result) return;
const result = await this.session.maybeCompact(messages);
if (!result) return;
this.emitMulticaEvent({ type: "compaction_start" });
if (result?.kept) {
this.agent.replaceMessages(result.kept);
}
const endEvent: CompactionEndEvent = {
type: "compaction_end",
removed: result?.removedCount ?? 0,
kept: result?.kept.length ?? messages.length,
tokensRemoved: result?.tokensRemoved,
tokensKept: result?.tokensKept,
reason: result?.reason ?? "tokens",
};
this.emitMulticaEvent(endEvent);
} catch (err) {
throw err;
this.emitMulticaEvent({ type: "compaction_start" });
if (result.kept) {
this.agent.replaceMessages(result.kept);
}
const endEvent: CompactionEndEvent = {
type: "compaction_end",
removed: result.removedCount ?? 0,
kept: result.kept.length ?? messages.length,
tokensRemoved: result.tokensRemoved,
tokensKept: result.tokensKept,
reason: result.reason ?? "tokens",
};
this.emitMulticaEvent(endEvent);
}
/**

View file

@ -44,7 +44,7 @@ vi.mock("../context-window/index.js", async () => {
const systemPromptTokens = params.systemPrompt ? 100 : 0;
const reserve = params.reserveTokens ?? 1024;
const availableTokens = Math.max(0, params.contextWindowTokens - systemPromptTokens - reserve);
const utilizationRatio = availableTokens > 0 ? (messageTokens * 1.2) / availableTokens : 1;
const utilizationRatio = availableTokens > 0 ? (messageTokens * 1.5) / availableTokens : 1;
return {
messageTokens,
@ -234,7 +234,7 @@ describe("compaction", () => {
// 100 * 10 = 1000 message tokens
// System: 100 tokens, Reserve: 1024
// Available: 2000 - 100 - 1024 = 876
// Utilization: (1000 * 1.2) / 876 = 1.37 > 0.8
// Utilization: (1000 * 1.5) / 876 = 1.71 > 0.8
const result = compactMessages(messages, {
mode: "tokens",
contextWindowTokens: 2000,
@ -249,7 +249,7 @@ describe("compaction", () => {
const messages = createMessages(5);
// 5 * 10 = 50 message tokens
// Available: 10000 - 100 - 1024 = 8876
// Utilization: (50 * 1.2) / 8876 = 0.007 < 0.8
// Utilization: (50 * 1.5) / 8876 = 0.008 < 0.8
const result = compactMessages(messages, {
mode: "tokens",
contextWindowTokens: 10000,

View file

@ -188,7 +188,7 @@ describe("formatCoalescedAnnouncementMessage", () => {
const msg = formatCoalescedAnnouncementMessage(records);
expect(msg).toContain("All 2 background tasks have completed");
expect(msg).toContain("All 2 background task(s) have completed");
expect(msg).toContain('Task 1: "Task A"');
expect(msg).toContain("Found issue A");
expect(msg).toContain('Task 2: "Task B"');
@ -251,4 +251,44 @@ describe("formatCoalescedAnnouncementMessage", () => {
expect(msg).toContain("上海多云9°C");
expect(msg).toContain("MUST include findings from every task item above");
});
it("includes continuation prompt when next is provided", () => {
const records = [
makeRecord({ runId: "run-1", label: "AAPL data", findings: "AAPL revenue: $100B" }),
makeRecord({ runId: "run-2", label: "MSFT data", findings: "MSFT revenue: $200B" }),
];
const msg = formatCoalescedAnnouncementMessage(records, "Summarize all data and write a PDF investment report");
expect(msg).toContain("CONTINUATION TASK");
expect(msg).toContain("Summarize all data and write a PDF investment report");
expect(msg).toContain("AAPL revenue: $100B");
expect(msg).toContain("MSFT revenue: $200B");
// Should NOT contain the default summarize instruction
expect(msg).not.toContain("Summarize these results naturally for the user");
});
it("uses continuation prompt even for single record when next is provided", () => {
const records = [
makeRecord({ runId: "run-1", label: "Data collection", findings: "All data collected" }),
];
const msg = formatCoalescedAnnouncementMessage(records, "Generate the final report");
expect(msg).toContain("CONTINUATION TASK");
expect(msg).toContain("Generate the final report");
expect(msg).toContain("All data collected");
});
it("uses default summarize instruction when next is not provided", () => {
const records = [
makeRecord({ runId: "run-1" }),
makeRecord({ runId: "run-2" }),
];
const msg = formatCoalescedAnnouncementMessage(records);
expect(msg).not.toContain("CONTINUATION TASK");
expect(msg).toContain("Summarize these results naturally for the user");
});
});

View file

@ -193,12 +193,17 @@ export function formatAnnouncementMessage(params: FormatAnnouncementParams): str
/**
* Format a coalesced announcement message from multiple completed subagent runs.
* When only one record is provided, delegates to formatAnnouncementMessage.
*
* @param next Optional continuation prompt from a SubagentGroup. When present,
* the parent agent is instructed to execute the continuation using the combined
* findings, rather than just summarizing.
*/
export function formatCoalescedAnnouncementMessage(
records: SubagentRunRecord[],
next?: string,
): string {
// Single record: delegate to existing format for backward-compatible behavior
if (records.length === 1) {
// Single record without continuation: delegate to existing format
if (records.length === 1 && !next) {
const r = records[0]!;
return formatAnnouncementMessage({
runId: r.runId,
@ -214,10 +219,9 @@ export function formatCoalescedAnnouncementMessage(
});
}
// Multiple records: build combined message.
// Include a strict raw-findings section so parent can reliably cover every task result.
// Multiple records (or single with continuation): build combined message.
const parts: string[] = [
`All ${records.length} background tasks have completed. Here are the combined results:`,
`All ${records.length} background task(s) have completed. Here are the combined results:`,
"",
];
@ -262,14 +266,30 @@ export function formatCoalescedAnnouncementMessage(
);
}
parts.push(
"",
"Summarize these results naturally for the user.",
"You MUST include findings from every task item above, without omission.",
"Keep it concise, but preserve concrete findings from each task.",
"Do not mention technical details like session IDs or that these were background tasks.",
"You can respond with NO_REPLY if no announcement is needed.",
);
// Continuation vs. summarization
if (next) {
parts.push(
"",
"---",
"",
"CONTINUATION TASK: The user's original request requires further work using the findings above.",
"Execute the following task now, using ALL the collected data:",
"",
next,
"",
"Use the raw findings above as your data source. Call tools as needed to complete this task.",
"Do not mention technical details like session IDs or that these were background tasks.",
);
} else {
parts.push(
"",
"Summarize these results naturally for the user.",
"You MUST include findings from every task item above, without omission.",
"Keep it concise, but preserve concrete findings from each task.",
"Do not mention technical details like session IDs or that these were background tasks.",
"You can respond with NO_REPLY if no announcement is needed.",
);
}
return parts.join("\n");
}
@ -289,8 +309,9 @@ export function formatCoalescedAnnouncementMessage(
export function runCoalescedAnnounceFlow(
requesterSessionId: string,
records: SubagentRunRecord[],
next?: string,
): boolean {
const message = formatCoalescedAnnouncementMessage(records);
const message = formatCoalescedAnnouncementMessage(records, next);
try {
const hub = getHub();

View file

@ -12,6 +12,7 @@ const rmSyncMock = vi.fn();
vi.mock("./registry-store.js", () => ({
loadSubagentRuns: loadSubagentRunsMock,
loadSubagentGroups: vi.fn(() => new Map()),
saveSubagentRuns: saveSubagentRunsMock,
}));

View file

@ -7,7 +7,7 @@
import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
import { join } from "node:path";
import { DATA_DIR } from "@multica/utils";
import type { SubagentRunRecord } from "./types.js";
import type { SubagentRunRecord, SubagentGroup } from "./types.js";
const SUBAGENTS_DIR = join(DATA_DIR, "subagents");
const RUNS_FILE = join(SUBAGENTS_DIR, "runs.json");
@ -15,6 +15,7 @@ const RUNS_FILE = join(SUBAGENTS_DIR, "runs.json");
interface SubagentRunsStore {
version: 1;
runs: Record<string, SubagentRunRecord>;
groups?: Record<string, SubagentGroup> | undefined;
}
function ensureDir(): void {
@ -48,13 +49,31 @@ export function loadSubagentRuns(): Map<string, SubagentRunRecord> {
}
}
/** Save all subagent runs to disk */
export function saveSubagentRuns(runs: Map<string, SubagentRunRecord>): void {
/** Load all persisted subagent groups */
export function loadSubagentGroups(): Map<string, SubagentGroup> {
if (!existsSync(RUNS_FILE)) return new Map();
try {
const content = readFileSync(RUNS_FILE, "utf-8");
const store = JSON.parse(content) as SubagentRunsStore;
if (store.version !== 1 || !store.groups) return new Map();
return new Map(Object.entries(store.groups));
} catch {
return new Map();
}
}
/** Save all subagent runs and groups to disk */
export function saveSubagentRuns(
runs: Map<string, SubagentRunRecord>,
groups?: Map<string, SubagentGroup>,
): void {
ensureDir();
const store: SubagentRunsStore = {
version: 1,
runs: Object.fromEntries(runs),
groups: groups && groups.size > 0 ? Object.fromEntries(groups) : undefined,
};
writeFileSync(RUNS_FILE, JSON.stringify(store, null, 2), "utf-8");

View file

@ -6,11 +6,12 @@
*/
import { getHub, isHubInitialized } from "../../hub/hub-singleton.js";
import { loadSubagentRuns, saveSubagentRuns } from "./registry-store.js";
import { loadSubagentRuns, saveSubagentRuns, loadSubagentGroups } from "./registry-store.js";
import { readLatestAssistantReply, runCoalescedAnnounceFlow } from "./announce.js";
import type {
RegisterSubagentRunParams,
SubagentRunRecord,
SubagentGroup,
} from "./types.js";
import { resolveSessionDir } from "../session/storage.js";
import { rmSync } from "node:fs";
@ -28,6 +29,7 @@ const SWEEP_INTERVAL_MS = 60 * 1000;
// ============================================================================
const subagentRuns = new Map<string, SubagentRunRecord>();
const subagentGroups = new Map<string, SubagentGroup>();
let sweepTimer: ReturnType<typeof setInterval> | undefined;
const resumedRequesters = new Set<string>();
@ -50,6 +52,12 @@ export function initSubagentRegistry(): void {
}
}
// Restore groups
const persistedGroups = loadSubagentGroups();
for (const [groupId, group] of persistedGroups) {
subagentGroups.set(groupId, group);
}
// Process incomplete runs
const affectedRequesters = new Set<string>();
@ -91,6 +99,45 @@ export function initSubagentRegistry(): void {
}
}
// ============================================================================
// Group management
// ============================================================================
/** Create a new subagent group. Returns the group record. */
export function createSubagentGroup(params: {
groupId: string;
requesterSessionId: string;
label?: string;
next?: string;
}): SubagentGroup {
const group: SubagentGroup = {
groupId: params.groupId,
requesterSessionId: params.requesterSessionId,
label: params.label,
next: params.next,
createdAt: Date.now(),
};
subagentGroups.set(params.groupId, group);
persist();
return group;
}
/** Get a group by ID. */
export function getSubagentGroup(groupId: string): SubagentGroup | undefined {
return subagentGroups.get(groupId);
}
/** List all runs belonging to a group. */
export function listGroupRuns(groupId: string): SubagentRunRecord[] {
const result: SubagentRunRecord[] = [];
for (const record of subagentRuns.values()) {
if (record.groupId === groupId) {
result.push(record);
}
}
return result;
}
/** Register a new subagent run and start tracking its lifecycle. */
export function registerSubagentRun(params: RegisterSubagentRunParams): SubagentRunRecord {
const {
@ -102,6 +149,7 @@ export function registerSubagentRun(params: RegisterSubagentRunParams): Subagent
cleanup = "delete",
timeoutSeconds,
announce,
groupId,
start,
} = params;
@ -113,6 +161,7 @@ export function registerSubagentRun(params: RegisterSubagentRunParams): Subagent
label,
cleanup,
announce,
groupId,
createdAt: Date.now(),
};
@ -190,6 +239,7 @@ export function shutdownSubagentRegistry(): void {
/** Reset all state (for testing). */
export function resetSubagentRegistryForTests(): void {
subagentRuns.clear();
subagentGroups.clear();
resumedRequesters.clear();
stopSweeper();
}
@ -300,37 +350,59 @@ function captureFindings(record: SubagentRunRecord): void {
/**
* Phase 2: Announce completed-but-unannounced runs.
*
* Runs with announce="silent" are held back until ALL silent runs from the
* same requester have completed. All other runs (immediate / undefined) are
* announced per-completion as before.
* Three announcement paths:
* 1. Grouped runs wait for all runs in the group to complete, then announce
* together with the group's `next` continuation prompt (if any).
* 2. Ungrouped silent runs legacy behavior: wait for ALL silent runs from
* the same requester to complete, then announce together.
* 3. Ungrouped immediate runs announce per-completion (default).
*/
function checkAndAnnounce(requesterSessionId: string): void {
const allRuns = listSubagentRuns(requesterSessionId);
// ── Immediate runs: announce per-completion (default behavior) ──
const immediateReady = allRuns.filter(
// ── 1. Grouped runs: announce by group when all members complete ──
const groupIds = new Set<string>();
for (const r of allRuns) {
if (r.groupId && !r.announced) groupIds.add(r.groupId);
}
for (const groupId of groupIds) {
const groupRuns = allRuns.filter(r => r.groupId === groupId);
const unannounced = groupRuns.filter(r => !r.announced);
const ready = unannounced.filter(r => r.endedAt !== undefined && r.findingsCaptured);
if (ready.length > 0 && ready.length === unannounced.length) {
const group = subagentGroups.get(groupId);
announceRuns(requesterSessionId, ready, group?.next);
}
}
// ── 2. Ungrouped runs: original immediate/silent logic ──
const ungrouped = allRuns.filter(r => !r.groupId);
// Immediate: announce per-completion
const immediateReady = ungrouped.filter(
r => !r.announced && r.endedAt !== undefined && r.findingsCaptured && r.announce !== "silent",
);
if (immediateReady.length > 0) {
announceGroup(requesterSessionId, immediateReady);
announceRuns(requesterSessionId, immediateReady);
}
// ── Silent runs: announce only when ALL silent runs are done ──
const silentRuns = allRuns.filter(r => r.announce === "silent");
// Silent: announce only when ALL ungrouped silent runs are done
const silentRuns = ungrouped.filter(r => r.announce === "silent");
const unannouncedSilent = silentRuns.filter(r => !r.announced);
const silentReady = unannouncedSilent.filter(
r => r.endedAt !== undefined && r.findingsCaptured,
);
// All unannounced silent runs must be ready (ended + findings captured)
if (silentReady.length > 0 && silentReady.length === unannouncedSilent.length) {
announceGroup(requesterSessionId, silentReady);
announceRuns(requesterSessionId, silentReady);
}
}
/** Announce a group of runs and mark them as announced. */
function announceGroup(requesterSessionId: string, runs: SubagentRunRecord[]): void {
const announced = runCoalescedAnnounceFlow(requesterSessionId, runs);
/** Announce a batch of completed runs and mark them as announced. */
function announceRuns(requesterSessionId: string, runs: SubagentRunRecord[], next?: string): void {
const announced = runCoalescedAnnounceFlow(requesterSessionId, runs, next);
if (announced) {
for (const r of runs) {
@ -415,9 +487,18 @@ function sweep(): void {
}
}
// Clean up groups whose runs have all been archived
for (const [groupId] of subagentGroups) {
const hasActiveRuns = [...subagentRuns.values()].some(r => r.groupId === groupId);
if (!hasActiveRuns) {
subagentGroups.delete(groupId);
removed++;
}
}
if (removed > 0) {
persist();
console.log(`[SubagentRegistry] Archived ${removed} completed run(s)`);
console.log(`[SubagentRegistry] Archived ${removed} completed run(s)/group(s)`);
}
if (subagentRuns.size === 0) {
@ -431,7 +512,7 @@ function sweep(): void {
function persist(): void {
try {
saveSubagentRuns(subagentRuns);
saveSubagentRuns(subagentRuns, subagentGroups);
} catch (err) {
console.error(`[SubagentRegistry] Failed to persist runs:`, err);
}

View file

@ -11,6 +11,26 @@ export type SubagentRunOutcome = {
error?: string | undefined;
};
/**
* A logical group of subagent runs that are tracked together.
* Groups enable "collect all, then act" workflows:
* all runs in a group must complete before the combined results
* (plus an optional `next` continuation) are announced to the parent.
*/
export type SubagentGroup = {
/** Unique group identifier (UUIDv7) */
groupId: string;
/** Session ID of the parent (requester) agent */
requesterSessionId: string;
/** Optional human-readable label for the group */
label?: string | undefined;
/** Continuation prompt executed after all runs in the group complete.
* Injected into the announcement so the parent agent acts on the combined findings. */
next?: string | undefined;
/** Timestamp when the group was created */
createdAt: number;
};
/** Persistent record tracking a single subagent run */
export type SubagentRunRecord = {
/** Unique run identifier (UUIDv7) */
@ -48,6 +68,9 @@ export type SubagentRunRecord = {
/** Announcement mode: "immediate" (default) announces per-completion,
* "silent" defers until all silent runs from the same requester complete. */
announce?: "immediate" | "silent" | undefined;
/** Group ID this run belongs to (if any). Runs in a group are announced
* together when all complete, regardless of the `announce` field. */
groupId?: string | undefined;
};
/** Parameters for registering a new subagent run */
@ -63,6 +86,12 @@ export type RegisterSubagentRunParams = {
start?: (() => void) | undefined;
/** Announcement mode: "immediate" (default) or "silent" (defer until all silent runs complete). */
announce?: "immediate" | "silent" | undefined;
/** Group ID to join. Runs in a group are announced together when all complete. */
groupId?: string | undefined;
/** Continuation prompt for the group. Only used on group creation (first spawn).
* After all runs in the group complete, this prompt is included in the announcement
* so the parent agent can act on the combined findings (e.g. summarize, write PDF). */
next?: string | undefined;
};
/** Parameters for the announce flow */

View file

@ -262,23 +262,47 @@ export function buildConditionalToolSections(
lines.push(
"## Sub-Agents",
"If a task is complex or long-running, spawn a sub-agent. It will do the work and report back when done.",
"IMPORTANT: After spawning sub-agents, do NOT immediately check on them with sessions_list. " +
"Results are delivered directly into your context automatically when the sub-agent finishes. " +
"Continue with other tasks or finish your turn and wait for the results to arrive.",
"You may use sessions_list to check on sub-agents only if a long time has passed or the user explicitly asks about their status.",
"Sub-agents cannot spawn nested sub-agents.",
"",
"### Critical Rules",
"- **NEVER fabricate, guess, or make up data that a sub-agent has not yet returned.** " +
"This includes completion status — do NOT claim tasks are done until you receive actual results.",
"- After spawning, do NOT proceed with work that depends on the sub-agent results. " +
"You can still chat with the user, do unrelated tasks, or explain what the sub-agents are working on.",
"- Sub-agents cannot spawn nested sub-agents.",
"- You can use `sessions_list` to check sub-agent status if needed.",
"",
"### Groups and Continuation (`next`) — ALWAYS use for multi-agent tasks",
"When spawning multiple sub-agents, **always** use `next` to define the follow-up work. " +
"This is the standard pattern — do NOT use bare `announce: \"silent\"` for multi-agent collect-then-act workflows.",
"",
"```",
"// First spawn — creates a group automatically, returns groupId",
'sessions_spawn({ task: "Get AAPL financials", next: "Summarize all data and write a PDF report", label: "AAPL" })',
"// → { groupId: \"grp-abc\", runId: \"...\" }",
"",
"// Subsequent spawns — join the same group",
'sessions_spawn({ task: "Get MSFT financials", groupId: "grp-abc", label: "MSFT" })',
'sessions_spawn({ task: "Get GOOG financials", groupId: "grp-abc", label: "GOOG" })',
"```",
"",
"The system waits for ALL runs in the group to complete, then delivers the combined findings " +
"plus the `next` continuation prompt back to you. You can then use tools (write files, call APIs, etc.) " +
"to complete the follow-up work. The user is NOT blocked during this process — they can keep chatting.",
"",
"Use `next` whenever the user's request involves: collect data → then act on it (summarize, analyze, generate files).",
"Without `next`, findings are summarized but no further action is taken.",
"",
"### Announce Modes (when not using groups)",
"- `announce: \"immediate\"` (default): findings delivered per sub-agent as each completes.",
"- `announce: \"silent\"`: all findings held until every silent sub-agent finishes, then delivered together.",
"Groups always use silent collection internally — you don't need to set announce when using groupId.",
"",
"### Timeout Guidelines",
"Set timeoutSeconds generously — a sub-agent that times out loses all its work.",
"- Simple tasks (search, read, summarize): 600 (10 min, the default)",
"- Moderate tasks (multi-step research, file downloads + analysis): 9001200 (1520 min)",
"- Complex tasks (code generation, PDF creation, multi-file operations): 12001800 (2030 min)",
"When in doubt, use a longer timeout. It is always better to wait longer than to lose completed work.",
"",
"### Announce Modes",
"- `announce: \"immediate\"` (default): Each sub-agent's findings are delivered to you as soon as it completes.",
"- `announce: \"silent\"`: All findings are held back until every silent sub-agent finishes, then delivered as ONE combined report.",
"Use \"silent\" when you want to collect data from multiple sub-agents first, then summarize everything at once.",
"When in doubt, use a longer timeout.",
"",
);
}

View file

@ -7,7 +7,7 @@
import { Type } from "@sinclair/typebox";
import type { AgentTool } from "@mariozechner/pi-agent-core";
import { listSubagentRuns, getSubagentRun } from "../subagent/registry.js";
import { listSubagentRuns, getSubagentRun, getSubagentGroup } from "../subagent/registry.js";
import type { SubagentRunRecord } from "../subagent/types.js";
const SessionsListSchema = Type.Object({
@ -79,6 +79,11 @@ function formatRunDetail(record: SubagentRunRecord, now: number): string {
];
if (record.label) lines.push(`Label: ${record.label}`);
if (record.groupId) {
const group = getSubagentGroup(record.groupId);
lines.push(`Group: ${record.groupId}${group?.label ? ` (${group.label})` : ""}`);
if (group?.next) lines.push(`Continuation: ${group.next.slice(0, 120)}${group.next.length > 120 ? "…" : ""}`);
}
lines.push(`Task: ${record.task}`);
lines.push(`Status: ${status}${record.outcome?.error ? `${record.outcome.error}` : ""}`);
lines.push(`Child Session: ${record.childSessionId}`);
@ -128,8 +133,7 @@ export function createSessionsListTool(
description:
"List all subagent runs spawned by this session and their current status. " +
"Optionally pass a runId to get detailed information about a specific run. " +
"NOTE: Do NOT call this immediately after spawning subagents — results arrive automatically in your context when subagents complete. " +
"Only use this if a long time has passed or the user explicitly asks about subagent status.",
"Use this to check subagent progress or when the user asks about status.",
parameters: SessionsListSchema,
execute: async (_toolCallId, args) => {
const { runId } = args as SessionsListArgs;
@ -177,21 +181,59 @@ export function createSessionsListTool(
const someRunning = runs.some((r) => !r.endedAt);
// Build status lines for each run
// Build status lines, grouping runs by groupId
const statusLines: string[] = [];
for (let i = 0; i < runs.length; i++) {
const r = runs[i]!;
const groupedRuns = new Map<string, SubagentRunRecord[]>();
const ungroupedRuns: SubagentRunRecord[] = [];
for (const r of runs) {
if (r.groupId) {
const list = groupedRuns.get(r.groupId) ?? [];
list.push(r);
groupedRuns.set(r.groupId, list);
} else {
ungroupedRuns.push(r);
}
}
let idx = 0;
// Grouped runs
for (const [gId, gRuns] of groupedRuns) {
const group = getSubagentGroup(gId);
const groupLabel = group?.label || `Group ${gId.slice(0, 8)}`;
const done = gRuns.filter(r => r.endedAt).length;
const nextSnippet = group?.next ? ` → next: "${group.next.slice(0, 60)}${group.next.length > 60 ? "…" : ""}"` : "";
statusLines.push(`\n 📦 ${groupLabel} (${done}/${gRuns.length} done${nextSnippet})`);
for (const r of gRuns) {
idx++;
const displayName = r.label || r.task.slice(0, 60);
const status = resolveStatus(r);
if (status === "running") {
const elapsed = r.startedAt ? formatElapsed(now - r.startedAt) : "just spawned";
statusLines.push(` ${idx}. [RUNNING] "${displayName}" (${elapsed})`);
} else {
const elapsed = r.startedAt && r.endedAt ? formatElapsed(r.endedAt - r.startedAt) : "";
statusLines.push(` ${idx}. [${status.toUpperCase()}] "${displayName}" (${elapsed})`);
}
}
}
// Ungrouped runs
for (const r of ungroupedRuns) {
idx++;
const displayName = r.label || r.task.slice(0, 60);
const status = resolveStatus(r);
if (status === "running") {
const elapsed = r.startedAt ? formatElapsed(now - r.startedAt) : "just spawned";
statusLines.push(` ${i + 1}. [RUNNING] "${displayName}" (${elapsed})`);
statusLines.push(` ${idx}. [RUNNING] "${displayName}" (${elapsed})`);
} else {
const elapsed = r.startedAt && r.endedAt ? formatElapsed(r.endedAt - r.startedAt) : "";
const findings = r.findingsCaptured
? (r.findings ? r.findings.slice(0, 200) + (r.findings.length > 200 ? "…" : "") : "(no output)")
: "(findings not yet captured)";
statusLines.push(` ${i + 1}. [${status.toUpperCase()}] "${displayName}" (${elapsed})\n Findings: ${findings}`);
statusLines.push(` ${idx}. [${status.toUpperCase()}] "${displayName}" (${elapsed})\n Findings: ${findings}`);
}
}

View file

@ -10,7 +10,7 @@ import { Type } from "@sinclair/typebox";
import type { AgentTool } from "@mariozechner/pi-agent-core";
import { getHub } from "../../hub/hub-singleton.js";
import { buildSubagentSystemPrompt } from "../subagent/announce.js";
import { registerSubagentRun } from "../subagent/registry.js";
import { registerSubagentRun, createSubagentGroup, getSubagentGroup } from "../subagent/registry.js";
import { resolveTools } from "../tools.js";
const SessionsSpawnSchema = Type.Object({
@ -41,7 +41,26 @@ const SessionsSpawnSchema = Type.Object({
"Announcement mode. 'immediate' (default): findings delivered as each subagent completes. " +
"'silent': defer all announcements until every silent subagent from this session finishes, " +
"then deliver one combined report. Use 'silent' when spawning multiple subagents to collect " +
"data in parallel and you want to summarize everything at once.",
"data in parallel and you want to summarize everything at once. " +
"Ignored when groupId is provided (groups always collect all results before announcing).",
}),
),
groupId: Type.Optional(
Type.String({
description:
"Join an existing group. Pass the groupId returned by a previous sessions_spawn call " +
"to add this subagent to the same group. All runs in a group are announced together " +
"when the last one completes. If omitted AND 'next' is provided, a new group is created automatically.",
}),
),
next: Type.Optional(
Type.String({
description:
"Continuation task to execute after ALL subagents in the group complete. " +
"Only used when creating a new group (first spawn without groupId). " +
"When set, the combined findings from all subagents plus this 'next' prompt " +
"are delivered to you so you can perform follow-up work (e.g. summarize, generate reports, write files). " +
"Setting 'next' automatically creates a group and implies silent collection.",
}),
),
});
@ -53,12 +72,15 @@ type SessionsSpawnArgs = {
cleanup?: "delete" | "keep";
timeoutSeconds?: number;
announce?: "immediate" | "silent";
groupId?: string;
next?: string;
};
export type SessionsSpawnResult = {
status: "accepted" | "error";
childSessionId?: string;
runId?: string;
groupId?: string;
error?: string;
};
@ -79,13 +101,15 @@ export function createSessionsSpawnTool(
label: "Spawn Subagent",
description:
"Spawn a background subagent to handle a specific task. The subagent runs in an isolated session with its own tool set. " +
"When it completes, its findings are delivered directly into your context automatically — you do NOT need to poll or check. " +
"IMPORTANT: After spawning subagents, continue with any other immediate tasks you have, or simply finish your turn and wait. " +
"Do NOT call sessions_list to check on subagents you just spawned — results take time and will arrive on their own. " +
"When it completes, its findings are delivered directly into your context automatically. " +
"After spawning, do NOT proceed with work that depends on the results — but you can still chat or do unrelated tasks. " +
"When spawning multiple subagents for a collect-then-act workflow, ALWAYS use the `next` parameter " +
"on the first spawn to define follow-up work, then pass the returned groupId to subsequent spawns. " +
"Use this for parallelizable work, long-running analysis, or tasks that benefit from isolation.",
parameters: SessionsSpawnSchema,
execute: async (_toolCallId, args) => {
const { task, label, model, cleanup = "delete", timeoutSeconds, announce } = args as SessionsSpawnArgs;
const { task, label, model, cleanup = "delete", timeoutSeconds, announce, next } = args as SessionsSpawnArgs;
let { groupId } = args as SessionsSpawnArgs;
// Guard: subagents cannot spawn subagents
if (options.isSubagent) {
@ -102,6 +126,28 @@ export function createSessionsSpawnTool(
const runId = uuidv7();
const childSessionId = uuidv7();
// Validate groupId if provided
if (groupId) {
const existingGroup = getSubagentGroup(groupId);
if (!existingGroup) {
return {
content: [{ type: "text", text: `Error: group not found: ${groupId}. Use the groupId returned by a previous sessions_spawn call.` }],
details: { status: "error", error: `group not found: ${groupId}` },
};
}
}
// Auto-create group when `next` is provided without an existing groupId
if (!groupId && next) {
groupId = uuidv7();
createSubagentGroup({
groupId,
requesterSessionId,
label: label ? `Group: ${label}` : undefined,
next,
});
}
// Resolve tools for the subagent (with isSubagent=true for policy filtering)
const subagentTools = resolveTools({ isSubagent: true });
const toolNames = subagentTools.map((t) => t.name);
@ -135,21 +181,27 @@ export function createSessionsSpawnTool(
label,
cleanup,
timeoutSeconds,
announce,
announce: groupId ? "silent" : announce,
groupId,
start: () => childAgent.write(task),
});
// Build response text
const groupInfo = groupId ? `\nGroup: ${groupId}` : "";
const nextInfo = next ? `\nContinuation: "${next.slice(0, 100)}${next.length > 100 ? "…" : ""}"` : "";
const responseText =
`Subagent spawned: ${label || task.slice(0, 80)}\n` +
`Run: ${runId}${groupInfo}${nextInfo}\n\n` +
`⏳ WAITING FOR RESULTS — do NOT proceed with work that depends on these results.\n` +
`Do NOT fabricate data or completion status. Results will arrive in your context automatically.`;
return {
content: [
{
type: "text",
text: `Subagent spawned successfully.\n\nRun ID: ${runId}\nSession: ${childSessionId}\nTask: ${label || task.slice(0, 80)}\n\nThe subagent is now working in the background. Its findings will be delivered directly into your context when it completes — do NOT poll or call sessions_list for it. Continue with other tasks or finish your turn.`,
},
],
content: [{ type: "text", text: responseText }],
details: {
status: "accepted",
childSessionId,
runId,
groupId,
},
};
} catch (err) {

View file

@ -9,13 +9,22 @@ import {
type AgentMessageItem,
type ExecApprovalRequestPayload,
type ApprovalDecision,
type CompactionEndEvent,
} from "@multica/sdk";
export type ToolStatus = "running" | "success" | "error" | "interrupted";
export interface CompactionInfo {
removed: number;
kept: number;
tokensRemoved?: number;
tokensKept?: number;
reason: string;
}
export interface Message {
id: string;
role: "user" | "assistant" | "toolResult";
role: "user" | "assistant" | "toolResult" | "system";
content: ContentBlock[];
agentId: string;
stopReason?: string;
@ -24,6 +33,8 @@ export interface Message {
toolArgs?: Record<string, unknown>;
toolStatus?: ToolStatus;
isError?: boolean;
systemType?: "compaction";
compaction?: CompactionInfo;
}
export interface ChatError {
@ -215,6 +226,27 @@ export function useChat() {
}
case "tool_execution_update":
break;
case "compaction_end": {
const ce = event as CompactionEndEvent;
setMessages((prev) => [
...prev,
{
id: uuidv7(),
role: "system",
content: [],
agentId: payload.agentId,
systemType: "compaction",
compaction: {
removed: ce.removed,
kept: ce.kept,
tokensRemoved: ce.tokensRemoved,
tokensKept: ce.tokensKept,
reason: ce.reason,
},
},
]);
break;
}
}
}, []);

View file

@ -2,9 +2,17 @@ import type { ContentBlock } from "@multica/sdk"
export type ToolStatus = "running" | "success" | "error" | "interrupted"
export interface CompactionInfo {
removed: number
kept: number
tokensRemoved?: number
tokensKept?: number
reason: string
}
export interface Message {
id: string
role: "user" | "assistant" | "toolResult"
role: "user" | "assistant" | "toolResult" | "system"
content: ContentBlock[]
agentId: string
stopReason?: string
@ -13,4 +21,6 @@ export interface Message {
toolArgs?: Record<string, unknown>
toolStatus?: ToolStatus
isError?: boolean
systemType?: "compaction"
compaction?: CompactionInfo
}

View file

@ -0,0 +1,45 @@
"use client"
import { memo } from "react"
import { Scissors } from "lucide-react"
import type { Message } from "@multica/store"
function formatTokens(n: number): string {
if (n >= 1000) return `~${(n / 1000).toFixed(1)}k`
return `${n}`
}
interface CompactionItemProps {
message: Message
}
export const CompactionItem = memo(function CompactionItem({ message }: CompactionItemProps) {
const info = message.compaction
if (!info) return null
const label = info.reason === "summary" ? "Context summarized" : "Context compacted"
const removed = `${info.removed} messages removed`
const tokens = info.tokensRemoved != null
? `, ${formatTokens(info.tokensRemoved)} tokens freed`
: ""
return (
<div className="py-0.5 px-2.5 text-sm text-muted-foreground">
<div className="flex items-center gap-1.5 px-2.5 py-1">
{/* Status dot */}
<span className="size-1.5 rounded-full shrink-0 bg-muted-foreground/40" />
{/* Icon */}
<Scissors className="size-3.5 shrink-0" />
{/* Label */}
<span className="font-medium shrink-0">{label}</span>
{/* Stats */}
<span className="ml-auto text-xs text-muted-foreground/60 shrink-0">
{removed}{tokens}
</span>
</div>
</div>
)
})

View file

@ -5,6 +5,7 @@ import { MemoizedMarkdown } from "@multica/ui/components/markdown";
import { StreamingMarkdown } from "@multica/ui/components/markdown/StreamingMarkdown";
import { ToolCallItem } from "@multica/ui/components/tool-call-item";
import { ThinkingItem } from "@multica/ui/components/thinking-item";
import { CompactionItem } from "@multica/ui/components/compaction-item";
import { cn, getTextContent } from "@multica/ui/lib/utils";
import type { Message } from "@multica/store";
import type { ContentBlock, ToolCall, ThinkingContent } from "@multica/sdk";
@ -78,6 +79,11 @@ export const MessageList = memo(function MessageList({ messages, streamingIds }:
return (
<div className="relative p-6 px-4 sm:px-10 max-w-4xl mx-auto">
{messages.map((msg) => {
// System messages (e.g. compaction notifications)
if (msg.role === "system") {
return <CompactionItem key={msg.id} message={msg} />
}
// ToolResult messages → render as tool execution item
if (msg.role === "toolResult") {
return <ToolCallItem key={msg.id} message={msg} />

View file

@ -0,0 +1,463 @@
---
name: Earnings Analysis
description: >-
Analyze a company's financial statements (income statement, balance sheet,
cash flow statement) to assess financial health, earnings quality, and
competitive advantage. Use when the user asks to read/analyze financial
statements, check earnings quality, assess financial health, evaluate
profitability trends, or screen for competitive moats.
version: 1.0.0
metadata:
emoji: "\U0001F4D1"
requires:
env:
- FINANCIAL_DATASETS_API_KEY
tags:
- finance
- earnings
- analysis
- statements
- buffett
userInvocable: true
disableModelInvocation: false
---
## Instructions
You are performing a structured financial statement analysis. Follow all steps in order and show your work. Output language must match the user's input language.
**IMPORTANT: This analysis requires BOTH structured data AND external context.** You MUST use `web_search` to gather earnings call insights, industry context, and explanations for data anomalies. An analysis based only on API data without any web research is incomplete. Expect to make 3-6 web searches throughout the analysis.
### Progress Checklist
```
Earnings Analysis Progress:
- [ ] Step 1: Gather financial data
- [ ] Step 2: Income statement analysis
- [ ] Step 3: Balance sheet analysis
- [ ] Step 4: Cash flow statement analysis
- [ ] Step 5: Buffett competitive advantage scoring
- [ ] Step 6: Quality of earnings assessment
- [ ] Step 7: SEC filing qualitative analysis
- [ ] Step 8: Peer comparison (if requested)
- [ ] Step 9: Present findings
```
### Step 1: Gather Financial Data
Use `data` tool with `domain="finance"` for all structured data calls.
#### 1a. Structured Data
1. **Annual financial statements** (5 years):
```
action: "get_all_financial_statements"
params: { ticker: "[TICKER]", period: "annual", limit: 5 }
```
This returns income statements, balance sheets, and cash flow statements together.
2. **Quarterly financial statements** (last 4 quarters):
```
action: "get_all_financial_statements"
params: { ticker: "[TICKER]", period: "quarterly", limit: 4 }
```
3. **Current financial metrics**:
```
action: "get_financial_metrics_snapshot"
params: { ticker: "[TICKER]" }
```
4. **Company facts**:
```
action: "get_company_facts"
params: { ticker: "[TICKER]" }
```
Extract: `sector`, `industry` — needed for benchmark comparisons in later steps.
5. **Current stock price**:
```
action: "get_price_snapshot"
params: { ticker: "[TICKER]" }
```
6. **Recent news**:
```
action: "get_news"
params: { ticker: "[TICKER]", limit: 10 }
```
Scan headlines for material events (earnings surprises, guidance changes, M&A, restructuring).
#### 1b. External Context (Web Search) — MANDATORY
You MUST run the following two web searches after gathering structured data. These are not optional.
1. **Latest earnings call highlights** (REQUIRED):
```
web_search("[COMPANY] latest earnings call highlights key takeaways [CURRENT_YEAR]")
```
Extract: management guidance, segment commentary, strategic priorities, forward outlook.
This provides the "why" behind the numbers that structured data cannot explain.
2. **Industry/macro backdrop** (REQUIRED):
```
web_search("[INDUSTRY] industry outlook trends [CURRENT_YEAR]")
```
Extract: industry growth rate, tailwinds/headwinds, regulatory changes, competitive dynamics.
This is needed to assess whether the company's performance is company-specific or industry-wide.
3. **Company-specific events** (conditional — run if news headlines or data show a material event):
```
web_search("[COMPANY] [EVENT_KEYWORD] impact analysis")
```
Examples: acquisition, restructuring, product launch, lawsuit, management change.
**Checkpoint:** Before proceeding to Step 2, verify that you have completed at least 2 web searches above. If you have not, go back and run them now.
### Step 2: Income Statement Analysis
Analyze the income statement across all 5 annual periods. Calculate and present:
1. **Revenue trend**:
- Year-over-year growth rate for each year
- 5-year CAGR: `(Revenue_latest / Revenue_earliest)^(1/years) - 1`
- Flag any years with revenue decline
2. **Margin analysis** (calculate for each year, show the trend):
- Gross Margin = Gross Profit / Revenue
- Operating Margin = Operating Income / Revenue
- Net Margin = Net Income / Revenue
3. **Margin benchmarks** (from [financial-ratios-benchmarks.md](references/financial-ratios-benchmarks.md)):
- Compare each margin to sector benchmarks
- Flag margins that are significantly above or below sector range
4. **EPS analysis**:
- EPS trend over 5 years
- EPS growth consistency (note any years of decline)
5. **Expense structure**:
- Cost of revenue as % of revenue (trend)
- SG&A as % of revenue (trend)
- R&D as % of revenue (trend, if applicable)
- Flag any expense category growing faster than revenue
6. **Contextual explanation** (REQUIRED — use web search results from Step 1b):
- For each significant trend or inflection point in the data above, provide a **why** explanation using the earnings call and industry context gathered in Step 1b.
- If revenue growth changed direction significantly (acceleration or deceleration > 10pp), run an additional search:
`web_search("[COMPANY] revenue [growth/decline] reason [YEAR]")`
- If margins shifted by more than 5pp year-over-year, run an additional search:
`web_search("[COMPANY] margin [expansion/compression] [YEAR]")`
- **Do not present a data table without narrative.** Every major trend must have a "why" attached, citing the source (earnings call, industry report, or company announcement).
Present as a table:
| Metric | Year 1 | Year 2 | Year 3 | Year 4 | Year 5 | 5Y CAGR |
|--------|--------|--------|--------|--------|--------|---------|
### Step 3: Balance Sheet Analysis
Analyze the balance sheet across all 5 annual periods:
1. **Liquidity**:
- Current Ratio = Current Assets / Current Liabilities
- Quick Ratio = (Current Assets - Inventory) / Current Liabilities
- Cash and equivalents trend
2. **Leverage**:
- Cash vs. Total Debt (short-term + long-term debt)
- Debt-to-Equity = Total Liabilities / Total Shareholders' Equity
- Interest Coverage = Operating Income / Interest Expense
- Debt payoff capacity = Total Debt / Net Income (in years)
3. **Asset quality**:
- Receivables Turnover = Revenue / Accounts Receivable
- Inventory Turnover = Cost of Revenue / Inventory (if applicable)
- Goodwill as % of Total Assets (flag if > 30%)
4. **Equity structure**:
- Retained earnings: year-over-year changes (growing?)
- Preferred stock: present or absent?
- Treasury stock: present? growing? (indicates buybacks)
5. **Working capital trend**:
- Net Working Capital = Current Assets - Current Liabilities
- Direction of change over 5 years
6. **Contextual explanation** (use web search results from Step 1b + additional searches as needed):
- Explain major balance sheet changes using earnings call context from Step 1b.
- If total debt changed significantly (> 30% YoY), you MUST search for the reason:
`web_search("[COMPANY] debt [issuance/repayment] [YEAR]")`
- If goodwill jumped, you MUST search for acquisition context:
`web_search("[COMPANY] acquisition [YEAR]")`
- Large treasury stock changes → confirm buyback program details:
`web_search("[COMPANY] share buyback program")`
Compare key ratios to sector benchmarks from [financial-ratios-benchmarks.md](references/financial-ratios-benchmarks.md).
### Step 4: Cash Flow Statement Analysis
Analyze cash flow statements across all 5 annual periods:
1. **Operating cash flow quality**:
- OCF vs. Net Income ratio for each year
- Target: OCF/NI > 1.0 (cash earnings exceed accrual earnings)
- Trend direction
2. **Free cash flow**:
- FCF = Operating Cash Flow - Capital Expenditure
- FCF Margin = FCF / Revenue
- 5-year FCF trend and CAGR
3. **Capital intensity**:
- CapEx / Revenue ratio
- CapEx / Net Income ratio (Buffett benchmark: < 25% excellent, < 50% acceptable)
- Is CapEx growing faster than revenue? (potential red flag)
4. **Cash flow composition**:
- Net cash from operating activities (should be consistently positive)
- Net cash from investing activities (negative = investing in growth)
- Net cash from financing activities (pattern: debt vs. equity funded?)
5. **Shareholder returns**:
- Dividends paid (from financing activities)
- Share buybacks / treasury stock repurchase
- Total payout ratio = (Dividends + Buybacks) / Net Income
- Is the company returning cash while maintaining growth?
6. **Contextual explanation** (use web search results from Step 1b + additional searches as needed):
- Explain cash flow patterns using earnings call context from Step 1b.
- If CapEx spiked significantly in a particular year, you MUST search for what was built:
`web_search("[COMPANY] capital expenditure investment [YEAR]")`
- If FCF diverged sharply from net income, search for restructuring or working capital events.
Present a summary table:
| Metric | Year 1 | Year 2 | Year 3 | Year 4 | Year 5 |
|--------|--------|--------|--------|--------|--------|
### Step 5: Buffett Competitive Advantage Scoring
Apply the scoring framework from [buffett-checklist.md](references/buffett-checklist.md).
For each of the 13 criteria across 4 categories:
1. Calculate the metric value from the data gathered in Steps 1-4
2. Determine the score based on the threshold table
3. Note the sector-specific caveats (Financials, Utilities, REITs, Growth-stage)
Present the full scorecard table and the overall rating (Excellent / Good / Average / Weak).
### Step 6: Quality of Earnings Assessment
Assess whether reported earnings are backed by real cash and sustainable operations:
1. **Accrual ratio**:
- Formula: (Net Income - Operating Cash Flow) / Total Assets
- Interpretation: Lower is better. High positive values suggest earnings are driven by accruals rather than cash.
- Red flag threshold: > 10%
2. **Revenue recognition quality**:
- Compare Accounts Receivable growth rate vs. Revenue growth rate
- If AR grows significantly faster than revenue → potential aggressive revenue recognition
- Red flag threshold: AR growth > Revenue growth + 5 percentage points
3. **Inventory quality** (if applicable):
- Compare Inventory growth rate vs. Cost of Revenue growth rate
- Rising inventory vs. flat/declining COGS → potential obsolescence risk
- Red flag threshold: Inventory growth > COGS growth + 10 percentage points
4. **One-time items**:
- Identify significant non-recurring charges or gains in the income statement
- Calculate adjusted net income excluding one-time items
- Compare adjusted vs. reported margins
5. **Deferred revenue trend** (if applicable):
- Growing deferred revenue is a positive signal (future revenue already contracted)
- Declining deferred revenue may signal weakening demand pipeline
6. **External validation** (web search):
- If any red flags were triggered above, search for corroborating or mitigating context:
`web_search("[COMPANY] accounting concerns OR restatement OR SEC inquiry")`
- Check for auditor changes (can signal accounting issues):
`web_search("[COMPANY] auditor change OR audit opinion")`
- Only run these searches if quantitative red flags exist. Do not search proactively for every company.
Summarize quality of earnings as: **High** / **Moderate** / **Low** with supporting evidence.
### Step 7: SEC Filing Qualitative Analysis
Pull and analyze the most recent annual or quarterly filing:
1. **Get filing list**:
```
action: "get_filings"
params: { ticker: "[TICKER]", filing_type: "10-K", limit: 1 }
```
If 10-K is not recent enough, also pull 10-Q:
```
action: "get_filings"
params: { ticker: "[TICKER]", filing_type: "10-Q", limit: 1 }
```
2. **Read MD&A section** (Management's Discussion and Analysis):
```
action: "get_filing_items"
params: { ticker: "[TICKER]", filing_type: "10-K", item: "7" }
```
For 10-Q, MD&A is item "2":
```
action: "get_filing_items"
params: { ticker: "[TICKER]", filing_type: "10-Q", item: "2" }
```
3. **Read Risk Factors**:
```
action: "get_filing_items"
params: { ticker: "[TICKER]", filing_type: "10-K", item: "1A" }
```
4. **Extract and analyze**:
- Management's explanation of revenue and margin trends
- Forward-looking statements and guidance
- Key risk factors that could impact financial health
- Any disclosures about accounting policy changes
- Cross-validate: Does management narrative align with the quantitative data from Steps 2-4?
- Flag contradictions between management tone and actual numbers
5. **Supplement with earnings call transcript** (REQUIRED — web search/fetch):
You MUST search for and incorporate the most recent earnings call. This is critical for understanding management's forward-looking view.
- Search for the transcript:
`web_search("[COMPANY] [QUARTER] [YEAR] earnings call transcript")`
- If a transcript URL is found, use `web_fetch` to read key sections (CEO/CFO prepared remarks, Q&A highlights).
- Extract: forward guidance, segment-level commentary, management tone on competitive position, key analyst concerns.
- Cross-reference earnings call statements with MD&A disclosures — flag any inconsistencies.
6. **Summarize key insights**:
- What management says about the business trajectory
- Material risks not visible in the numbers alone
- Any changes in risk factors vs. prior filings (if noticeable)
- Key analyst questions and management responses from earnings call (if available)
### Step 8: Peer Comparison (Conditional)
**Execute this step only when the user explicitly requests peer comparison or industry benchmarking.**
1. **Identify peers**:
- Use the `sector` and `industry` from `get_company_facts`
- Select 2-3 publicly traded competitors in the same industry
- If the user specifies peers, use those instead
2. **Pull peer data** (for each peer):
```
action: "get_financial_metrics_snapshot"
params: { ticker: "[PEER_TICKER]" }
```
```
action: "get_income_statements"
params: { ticker: "[PEER_TICKER]", period: "annual", limit: 1 }
```
```
action: "get_balance_sheets"
params: { ticker: "[PEER_TICKER]", period: "annual", limit: 1 }
```
3. **Comparative table**:
| Metric | [TARGET] | [PEER 1] | [PEER 2] | [PEER 3] | Sector Avg |
|--------|----------|----------|----------|----------|------------|
| Revenue Growth (YoY) | | | | | |
| Gross Margin | | | | | |
| Net Margin | | | | | |
| ROE | | | | | |
| D/E Ratio | | | | | |
| FCF Margin | | | | | |
| P/E Ratio | | | | | |
4. **Competitive position assessment**:
- Where does the target company rank among peers on each metric?
- Identify clear advantages and disadvantages relative to peers
- Note if the target trades at a premium or discount to peers and whether it's justified
### Step 9: Present Findings
Compile the full analysis into a structured report. Follow this exact structure:
#### 1. Executive Summary
- Company name, ticker, sector, current price
- One-paragraph thesis: Is this a financially healthy company with a durable competitive advantage?
- Financial health rating from Buffett scorecard (Excellent / Good / Average / Weak)
- Earnings quality assessment (High / Moderate / Low)
#### 2. Financial Health Scorecard
- Full Buffett checklist scorecard table from Step 5
- Total score and rating
#### 3. Trend Dashboard
- 5-year key metrics trend table from Steps 2-4:
| Metric | Y1 | Y2 | Y3 | Y4 | Y5 | Trend |
|--------|----|----|----|----|----|----|
| Revenue | | | | | | arrow |
| Gross Margin | | | | | | arrow |
| Net Margin | | | | | | arrow |
| ROE | | | | | | arrow |
| D/E Ratio | | | | | | arrow |
| FCF | | | | | | arrow |
| OCF/NI | | | | | | arrow |
| CapEx/NI | | | | | | arrow |
Use directional indicators in the Trend column.
#### 4. Quality of Earnings
- Summary from Step 6 with key metrics and assessment
#### 5. Key Strengths & Red Flags
- **Strengths**: List 3-5 financial strengths with supporting data
- **Red Flags**: List any warning signs discovered during analysis. If none, state "No material red flags identified."
Common red flags to watch for:
- Revenue growth but declining margins
- Net income growing but OCF declining
- AR growing faster than revenue
- Inventory building up vs. flat COGS
- Rising debt with declining interest coverage
- Retained earnings declining
- Large goodwill relative to total assets
- CapEx consistently > 50% of net income
- Management tone in MD&A contradicts financial data
#### 6. SEC Filing Insights
- Key findings from Step 7
- Management's outlook and material risks
#### 7. Peer Comparison (if Step 8 was executed)
- Comparative table and competitive position assessment
### Guardrails
- Always state the date range of financial data used.
- If any data is missing or unavailable, explicitly note it and adjust the analysis scope.
- Do not present calculated ratios as precise — round to one decimal place.
- Clearly distinguish between facts (from data) and interpretive conclusions.
- The Buffett scorecard is a screening framework, not a buy/sell recommendation. State this in the output.
- For non-US companies or companies not filing with the SEC, skip Step 7 and note the limitation.
- Output language must match the user's input language (Chinese input → Chinese output, English input → English output).
### Web Search Requirements
**Minimum mandatory searches (you MUST perform these):**
1. Earnings call highlights (Step 1b) — for management's own explanation of results
2. Industry outlook (Step 1b) — for macro/sector context
3. Earnings call transcript (Step 7) — for forward guidance and analyst Q&A
**Additional searches (trigger when data shows anomalies):**
- Revenue or margin inflection points (Steps 2-4)
- Major debt changes or acquisitions (Step 3)
- CapEx spikes (Step 4)
- Quality-of-earnings red flags (Step 6)
**Search principles:**
- **Source quality**: Prefer primary sources (SEC filings, company press releases, earnings call transcripts) over secondary sources (analyst blogs, news aggregators).
- **Cite with dates**: Always include source name and date when referencing external information.
- **Separate fact from opinion**: Label analyst or media commentary as external opinion, not fact.
- **Total budget**: Expect 3-8 web searches per analysis. Fewer than 3 means you are likely missing critical context.

View file

@ -0,0 +1,99 @@
# Buffett Competitive Advantage Checklist
Score each criterion and calculate a total. Use this to assess whether a company has a durable competitive advantage (economic moat).
## Scoring System
Total: 100 points across 4 categories (25 points each).
### Category 1: Profitability (25 points)
| # | Criterion | Excellent | Good | Weak |
|---|-----------|-----------|------|------|
| 1 | **Gross Margin** | > 40% → **10 pts** | 30-40% → **6 pts** | < 30% **2 pts** |
| 2 | **Net Margin** | > 20% → **10 pts** | 10-20% → **6 pts** | < 10% **2 pts** |
| 3 | **Return on Equity (ROE)** | > 15% → **5 pts** | 10-15% → **3 pts** | < 10% **1 pt** |
How to calculate:
- Gross Margin = Gross Profit / Revenue
- Net Margin = Net Income / Revenue
- ROE = Net Income / Total Shareholders' Equity
- Use the most recent annual figures; cross-check with 5-year average
### Category 2: Balance Sheet Health (25 points)
| # | Criterion | Pass | Partial | Fail |
|---|-----------|------|---------|------|
| 4 | **Cash > Total Debt** | Yes → **8 pts** | Cash > 50% of Debt → **4 pts** | Cash < 50% of Debt **1 pt** |
| 5 | **Debt-to-Equity Ratio** | < 0.8 **7 pts** | 0.8-1.5 **4 pts** | > 1.5 → **1 pt** |
| 6 | **No Preferred Stock** | None → **5 pts** | — | Has Preferred → **0 pts** |
| 7 | **Retained Earnings Growth** | Growing 5 consecutive years → **5 pts** | Growing 3-4 years → **3 pts** | Declining or flat → **1 pt** |
How to calculate:
- Cash = Cash and Cash Equivalents + Short-term Investments
- Total Debt = Short-term Debt + Long-term Debt
- D/E = Total Liabilities / Total Shareholders' Equity
- Retained Earnings: Compare year-over-year from balance sheets
Special note on D/E:
- Exclude operating lease liabilities from "debt" for this assessment (they are contractual obligations, not financial debt)
- If treasury stock is large, it reduces equity and inflates D/E — note this in analysis
### Category 3: Cash Flow Quality (25 points)
| # | Criterion | Excellent | Good | Weak |
|---|-----------|-----------|------|------|
| 8 | **CapEx / Net Income** | < 25% **10 pts** | 25-50% **6 pts** | > 50% → **2 pts** |
| 9 | **Operating CF > Net Income** | OCF/NI > 1.0 → **8 pts** | OCF/NI = 0.8-1.0 → **4 pts** | OCF/NI < 0.8 **1 pt** |
| 10 | **Shareholder Returns** | Buybacks + Dividends → **7 pts** | Dividends only → **4 pts** | Neither → **1 pt** |
How to calculate:
- CapEx: Capital Expenditure from cash flow statement (use absolute value)
- Operating CF: Net Cash from Operating Activities
- Buybacks: Check if Treasury Stock increased year-over-year, or look at "repurchase of common stock" in financing activities
- Dividends: Look at "dividends paid" in financing activities
Note on CapEx:
- One-time large CapEx (e.g., new factory, data center buildout) should be noted but not penalized if the 5-year average CapEx/NI is still within range
- Asset-light businesses (software, services) naturally score well here
### Category 4: Consistency (25 points)
| # | Criterion | Excellent | Good | Weak |
|---|-----------|-----------|------|------|
| 11 | **Revenue Growth Streak** | 5+ consecutive years growing → **10 pts** | 3-4 years → **6 pts** | < 3 years **2 pts** |
| 12 | **Net Income Growth Streak** | 5+ consecutive years growing → **10 pts** | 3-4 years → **6 pts** | < 3 years **2 pts** |
| 13 | **Recession Resilience** | Profitable through last recession → **5 pts** | Revenue dip < 10% **3 pts** | Significant losses **1 pt** |
How to assess:
- Revenue/NI growth: Check year-over-year changes for the last 5 years
- Recession resilience: Check 2020 (COVID) and 2022 (rate hikes) performance. For older data, check 2008-2009 if available.
- A single flat year in an otherwise consistent growth streak can be scored as "Good"
## Score Interpretation
| Total Score | Rating | Interpretation |
|-------------|--------|----------------|
| 80-100 | **Excellent** | Strong durable competitive advantage. Consistent profitability, fortress balance sheet, capital-light operations. Classic Buffett-style investment candidate. |
| 60-79 | **Good** | Solid business with some competitive advantages. May have minor weaknesses in one category. Worth deeper investigation. |
| 40-59 | **Average** | Mediocre competitive position. Multiple areas of concern. Higher risk of margin erosion or competitive disruption. |
| < 40 | **Weak** | No clear competitive advantage. High debt, inconsistent earnings, or capital-intensive operations. Not a typical Buffett investment. |
## Sector-Specific Caveats
- **Financials**: Skip gross margin (criterion 1). Use net interest margin > 3% as substitute for 10 pts. D/E ratio thresholds don't apply — use Tier 1 Capital Ratio > 10% for 7 pts instead.
- **Utilities**: Naturally capital-intensive (CapEx criterion will score low). Offset by checking regulated return stability. If regulated ROE is consistently 9-11%, award 6 pts for criterion 8.
- **REITs**: Required to pay out 90%+ as dividends, so retained earnings won't grow. Skip criterion 7; award 5 pts if FFO per share grows consistently instead.
- **Growth-stage Tech**: May not yet have 5 years of profitability. Score consistency based on revenue growth and gross margin expansion trajectory. Note that the overall score may be artificially low.
## Output Format
Present the scorecard as a table:
| # | Criterion | Value | Score | Max |
|---|-----------|-------|-------|-----|
| 1 | Gross Margin | 43.2% | 10 | 10 |
| 2 | Net Margin | 25.1% | 10 | 10 |
| ... | ... | ... | ... | ... |
| | **Total** | | **XX** | **100** |
| | **Rating** | | **Excellent/Good/Average/Weak** | |

View file

@ -0,0 +1,70 @@
# Financial Ratios Benchmarks by Sector
Use the company's `sector` from `get_company_facts` to look up benchmark ranges below. Compare the company's ratios against these benchmarks and note deviations.
## Profitability Benchmarks
| Sector | Gross Margin | Operating Margin | Net Margin | ROE | ROA |
|--------|-------------|-----------------|------------|-----|-----|
| Communication Services | 50-60% | 15-25% | 10-18% | 12-20% | 5-10% |
| Consumer Discretionary | 35-50% | 8-15% | 5-10% | 15-25% | 5-10% |
| Consumer Staples | 35-45% | 12-18% | 8-12% | 20-30% | 8-12% |
| Energy | 30-50% | 10-20% | 5-15% | 10-20% | 5-10% |
| Financials | N/A | 25-35% | 15-25% | 10-15% | 1-2% |
| Health Care | 55-70% | 15-25% | 10-20% | 15-25% | 8-12% |
| Industrials | 25-35% | 10-15% | 6-10% | 15-20% | 5-8% |
| Information Technology | 55-70% | 20-30% | 15-25% | 20-35% | 10-15% |
| Materials | 25-35% | 10-18% | 5-12% | 10-18% | 5-8% |
| Real Estate | 55-70% | 25-40% | 15-30% | 5-10% | 2-5% |
| Utilities | 35-50% | 15-25% | 8-15% | 8-12% | 3-5% |
## Balance Sheet Benchmarks
| Sector | Current Ratio | Quick Ratio | D/E Ratio | Interest Coverage |
|--------|--------------|-------------|-----------|-------------------|
| Communication Services | 1.0-1.5 | 0.8-1.2 | 0.8-1.5 | 4-8x |
| Consumer Discretionary | 1.2-2.0 | 0.8-1.5 | 0.5-1.2 | 5-10x |
| Consumer Staples | 1.0-1.5 | 0.6-1.0 | 0.5-1.0 | 8-15x |
| Energy | 1.0-1.5 | 0.8-1.2 | 0.3-0.8 | 5-10x |
| Financials | N/A | N/A | 2.0-8.0 | N/A |
| Health Care | 1.5-2.5 | 1.2-2.0 | 0.3-0.8 | 8-15x |
| Industrials | 1.2-2.0 | 0.8-1.5 | 0.5-1.0 | 6-12x |
| Information Technology | 2.0-3.5 | 1.5-3.0 | 0.2-0.6 | 15-30x |
| Materials | 1.5-2.5 | 1.0-1.5 | 0.4-0.8 | 6-12x |
| Real Estate | 1.0-1.5 | 0.5-1.0 | 0.8-1.5 | 3-5x |
| Utilities | 0.8-1.2 | 0.5-0.8 | 1.0-2.0 | 3-5x |
## Cash Flow Benchmarks
| Sector | FCF Margin | CapEx/Revenue | Op. CF / Net Income |
|--------|-----------|---------------|---------------------|
| Communication Services | 10-20% | 10-20% | 1.2-1.8x |
| Consumer Discretionary | 5-12% | 3-8% | 1.1-1.5x |
| Consumer Staples | 8-15% | 3-6% | 1.2-1.5x |
| Energy | 5-15% | 15-30% | 1.5-2.5x |
| Financials | N/A | 1-3% | N/A |
| Health Care | 15-25% | 3-8% | 1.2-1.8x |
| Industrials | 5-12% | 3-8% | 1.2-1.6x |
| Information Technology | 20-35% | 3-10% | 1.2-1.8x |
| Materials | 5-12% | 5-12% | 1.3-2.0x |
| Real Estate | 15-30% | 5-15% | 1.5-3.0x |
| Utilities | 5-10% | 15-25% | 2.0-3.5x |
## Usage Notes
- **Financials sector**: Gross margin and current/quick ratios are not meaningful for banks and insurers. Use net interest margin and capital adequacy ratios instead.
- **Real Estate**: High depreciation makes net margin less useful. Focus on Funds From Operations (FFO).
- **Growth-stage companies**: May have negative margins. Compare against growth-stage peers rather than mature sector benchmarks.
- **Cyclical sectors** (Energy, Materials, Industrials): Use cycle-average margins (5-7 years) rather than single-year comparisons.
- **Post-M&A**: Goodwill and amortization may distort margins for 1-2 years after acquisitions. Note any large acquisitions.
## Buffett's Rules of Thumb (Quick Reference)
| Metric | Excellent | Good | Weak |
|--------|-----------|------|------|
| Gross Margin | > 40% | 30-40% | < 30% |
| Net Margin | > 20% | 10-20% | < 10% |
| ROE | > 15% | 10-15% | < 10% |
| D/E Ratio | < 0.5 | 0.5-0.8 | > 0.8 |
| CapEx / Net Income | < 25% | 25-50% | > 50% |
| Debt Payoff (years) | < 2 | 2-4 | > 4 |