Merge remote-tracking branch 'origin/main' into feat/dashboard
This commit is contained in:
commit
fd098c04eb
22 changed files with 1329 additions and 130 deletions
|
|
@ -10,6 +10,23 @@ import { isHeartbeatAckEvent } from "../hub/heartbeat-filter.js";
|
|||
|
||||
const devNull = { write: () => true } as unknown as NodeJS.WritableStream;
|
||||
|
||||
const WRITEINTERNAL_RETRY_DELAY_MS = 5000;
|
||||
|
||||
/** Check if a runInternal error string indicates a transient failure worth retrying. */
|
||||
function isTransientRunError(errorMsg: string): boolean {
|
||||
const lower = errorMsg.toLowerCase();
|
||||
if (lower.includes("terminated")) return true;
|
||||
if (lower.includes("aborted")) return true;
|
||||
if (lower.includes("econnreset")) return true;
|
||||
if (lower.includes("etimedout")) return true;
|
||||
if (lower.includes("socket hang up")) return true;
|
||||
if (lower.includes("fetch failed")) return true;
|
||||
if (lower.includes("timeout") || lower.includes("timed out")) return true;
|
||||
if (/\b(429|502|503|504)\b/.test(lower)) return true;
|
||||
if (lower.includes("overloaded")) return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
/** Discriminated union of legacy Message, raw AgentEvent, and MulticaEvent */
|
||||
export type ChannelItem = Message | AgentEvent | MulticaEvent;
|
||||
|
||||
|
|
@ -122,30 +139,54 @@ export class AsyncAgent {
|
|||
.then(async () => {
|
||||
if (this._closed) return;
|
||||
const prevForward = this.forwardInternalAssistant;
|
||||
this.forwardInternalAssistant = forwardAssistant;
|
||||
try {
|
||||
const result = await this.agent.runInternal(content);
|
||||
await this.agent.flushSession();
|
||||
if (result.error) {
|
||||
// Internal run errors are for diagnostics only; do not leak to user stream.
|
||||
console.error(`[AsyncAgent] Internal run error: ${result.error}`);
|
||||
}
|
||||
// Stop forwarding BEFORE persist to avoid double-emitting the same
|
||||
// assistant message (once from runInternal streaming, once from appendMessage).
|
||||
this.forwardInternalAssistant = prevForward;
|
||||
// Persist the LLM summary so it remains in parent context for future turns
|
||||
if (persistResponse && result.text?.trim() && !isSilentReplyText(result.text)) {
|
||||
this.agent.persistAssistantSummary(result.text.trim());
|
||||
|
||||
for (let attempt = 1; attempt <= 2; attempt++) {
|
||||
this.forwardInternalAssistant = forwardAssistant;
|
||||
try {
|
||||
const result = await this.agent.runInternal(content);
|
||||
await this.agent.flushSession();
|
||||
|
||||
if (result.error) {
|
||||
if (attempt === 1 && isTransientRunError(result.error)) {
|
||||
console.warn(
|
||||
`[AsyncAgent] Internal run transient error: ${result.error}. Retrying in ${WRITEINTERNAL_RETRY_DELAY_MS}ms...`,
|
||||
);
|
||||
this.forwardInternalAssistant = prevForward;
|
||||
await new Promise((r) => setTimeout(r, WRITEINTERNAL_RETRY_DELAY_MS));
|
||||
continue;
|
||||
}
|
||||
// Final attempt or non-transient: log and give up
|
||||
console.error(`[AsyncAgent] Internal run error: ${result.error}`);
|
||||
this.forwardInternalAssistant = prevForward;
|
||||
return;
|
||||
}
|
||||
|
||||
// Success — stop forwarding BEFORE persist to avoid double-emitting
|
||||
this.forwardInternalAssistant = prevForward;
|
||||
if (persistResponse && result.text?.trim() && !isSilentReplyText(result.text)) {
|
||||
this.agent.persistAssistantSummary(result.text.trim());
|
||||
await this.agent.flushSession();
|
||||
}
|
||||
return;
|
||||
} catch (err) {
|
||||
const message = err instanceof Error ? err.message : String(err);
|
||||
if (attempt === 1 && isTransientRunError(message)) {
|
||||
console.warn(
|
||||
`[AsyncAgent] Internal run exception: ${message}. Retrying in ${WRITEINTERNAL_RETRY_DELAY_MS}ms...`,
|
||||
);
|
||||
this.forwardInternalAssistant = prevForward;
|
||||
await new Promise((r) => setTimeout(r, WRITEINTERNAL_RETRY_DELAY_MS));
|
||||
continue;
|
||||
}
|
||||
console.error(`[AsyncAgent] Internal run failed: ${message}`);
|
||||
this.forwardInternalAssistant = prevForward;
|
||||
return;
|
||||
}
|
||||
} finally {
|
||||
this.forwardInternalAssistant = prevForward;
|
||||
}
|
||||
})
|
||||
.catch((err) => {
|
||||
const message = err instanceof Error ? err.message : String(err);
|
||||
// Internal run exceptions are for diagnostics only; do not leak to user stream.
|
||||
console.error(`[AsyncAgent] Internal run failed: ${message}`);
|
||||
console.error(`[AsyncAgent] Internal run failed (outer): ${message}`);
|
||||
});
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -37,7 +37,7 @@ vi.mock("@mariozechner/pi-coding-agent", () => ({
|
|||
describe("token-estimation", () => {
|
||||
describe("constants", () => {
|
||||
it("should have correct safety margin", () => {
|
||||
expect(ESTIMATION_SAFETY_MARGIN).toBe(1.2);
|
||||
expect(ESTIMATION_SAFETY_MARGIN).toBe(1.5);
|
||||
});
|
||||
|
||||
it("should have correct compaction trigger ratio", () => {
|
||||
|
|
@ -63,20 +63,20 @@ describe("token-estimation", () => {
|
|||
});
|
||||
|
||||
it("should estimate tokens based on character count", () => {
|
||||
// ~3 chars per token
|
||||
expect(estimateSystemPromptTokens("abc")).toBe(1);
|
||||
expect(estimateSystemPromptTokens("abcdef")).toBe(2);
|
||||
expect(estimateSystemPromptTokens("abcdefghi")).toBe(3);
|
||||
// ~2 chars per token (conservative for CJK/mixed content)
|
||||
expect(estimateSystemPromptTokens("ab")).toBe(1);
|
||||
expect(estimateSystemPromptTokens("abcd")).toBe(2);
|
||||
expect(estimateSystemPromptTokens("abcdef")).toBe(3);
|
||||
});
|
||||
|
||||
it("should ceil the result", () => {
|
||||
// 4 chars / 3 = 1.33, should ceil to 2
|
||||
expect(estimateSystemPromptTokens("abcd")).toBe(2);
|
||||
// 3 chars / 2 = 1.5, should ceil to 2
|
||||
expect(estimateSystemPromptTokens("abc")).toBe(2);
|
||||
});
|
||||
|
||||
it("should handle long prompts", () => {
|
||||
const longPrompt = "a".repeat(3000);
|
||||
expect(estimateSystemPromptTokens(longPrompt)).toBe(1000);
|
||||
expect(estimateSystemPromptTokens(longPrompt)).toBe(1500);
|
||||
});
|
||||
});
|
||||
|
||||
|
|
@ -140,7 +140,7 @@ describe("token-estimation", () => {
|
|||
reserveTokens: 0,
|
||||
});
|
||||
|
||||
// Utilization = (tokens * 1.2) / available
|
||||
// Utilization = (tokens * 1.5) / available
|
||||
expect(result.utilizationRatio).toBeGreaterThan(0);
|
||||
});
|
||||
});
|
||||
|
|
@ -292,26 +292,26 @@ describe("token-estimation", () => {
|
|||
content: "x".repeat(400), // ~100 tokens
|
||||
} as AgentMessage;
|
||||
|
||||
// With safety margin 1.2, 100 * 1.2 = 120 tokens
|
||||
// 120 > 1000 * 0.1 = 100, so oversized
|
||||
// With safety margin 1.5, 100 * 1.5 = 150 tokens
|
||||
// 150 > 1000 * 0.1 = 100, so oversized
|
||||
expect(isMessageOversized(message, 1000, 0.1)).toBe(true);
|
||||
|
||||
// 120 < 1000 * 0.2 = 200, so not oversized
|
||||
// 150 < 1000 * 0.2 = 200, so not oversized
|
||||
expect(isMessageOversized(message, 1000, 0.2)).toBe(false);
|
||||
});
|
||||
|
||||
it("should apply safety margin to token count", () => {
|
||||
const message = {
|
||||
role: "user",
|
||||
content: "x".repeat(400), // ~100 tokens, with margin ~120
|
||||
content: "x".repeat(400), // ~100 tokens, with margin ~150
|
||||
} as AgentMessage;
|
||||
|
||||
// Without margin: 100 < 250 (50% of 500)
|
||||
// With margin: 120 < 250, still ok
|
||||
// With margin: 150 < 250, still ok
|
||||
expect(isMessageOversized(message, 500, 0.5)).toBe(false);
|
||||
|
||||
// Without margin: 100 < 100 would be false
|
||||
// With margin: 120 > 100, should be true
|
||||
// With margin: 150 > 100, should be true
|
||||
expect(isMessageOversized(message, 200, 0.5)).toBe(true);
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ import { estimateTokens } from "@mariozechner/pi-coding-agent";
|
|||
import type { TokenEstimation, TokenAwareCompactionResult } from "./types.js";
|
||||
|
||||
/** Safety margin coefficient to compensate for estimation inaccuracy */
|
||||
export const ESTIMATION_SAFETY_MARGIN = 1.2; // 20% buffer
|
||||
export const ESTIMATION_SAFETY_MARGIN = 1.5; // 50% buffer (covers CJK and mixed content)
|
||||
|
||||
/** Utilization threshold for triggering compaction */
|
||||
export const COMPACTION_TRIGGER_RATIO = 0.8; // 80%
|
||||
|
|
@ -32,10 +32,10 @@ export function estimateMessagesTokens(messages: AgentMessage[]): number {
|
|||
*/
|
||||
export function estimateSystemPromptTokens(systemPrompt: string | undefined): number {
|
||||
if (!systemPrompt) return 0;
|
||||
// Simple estimation: ~4 chars = 1 token (for English/code mixed text)
|
||||
// Chinese ~2 chars = 1 token
|
||||
// Average value of 3
|
||||
return Math.ceil(systemPrompt.length / 3);
|
||||
// Conservative estimation: ~2 chars = 1 token
|
||||
// English/code averages ~4 chars/token but CJK averages ~1-2 chars/token.
|
||||
// Using /2 as a safe default to prevent underestimation on mixed content.
|
||||
return Math.ceil(systemPrompt.length / 2);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
21
packages/core/src/agent/errors.ts
Normal file
21
packages/core/src/agent/errors.ts
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
/**
|
||||
* Error classification utilities for agent error handling.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Check if an error is a context overflow / "prompt too long" error from any LLM provider.
|
||||
*
|
||||
* These errors indicate the request exceeded the model's context window and should
|
||||
* trigger auto-compaction rather than auth profile rotation.
|
||||
*/
|
||||
export function isContextOverflowError(error: unknown): boolean {
|
||||
const msg = (error instanceof Error ? error.message : String(error)).toLowerCase();
|
||||
return (
|
||||
msg.includes("prompt is too long") ||
|
||||
msg.includes("context length exceeded") ||
|
||||
msg.includes("maximum context length") ||
|
||||
msg.includes("request_too_large") ||
|
||||
msg.includes("request size exceeds") ||
|
||||
(msg.includes("413") && msg.includes("too large"))
|
||||
);
|
||||
}
|
||||
|
|
@ -22,7 +22,14 @@ import {
|
|||
checkContextWindow,
|
||||
DEFAULT_CONTEXT_TOKENS,
|
||||
type ContextWindowGuardResult,
|
||||
estimateTokenUsage,
|
||||
COMPACTION_TRIGGER_RATIO,
|
||||
compactMessagesTokenAware,
|
||||
MIN_KEEP_MESSAGES,
|
||||
} from "./context-window/index.js";
|
||||
import {
|
||||
pruneToolResults,
|
||||
} from "./context-window/tool-result-pruning.js";
|
||||
import { mergeToolsConfig, type ToolsConfig } from "./tools/policy.js";
|
||||
import {
|
||||
loadAuthProfileStore,
|
||||
|
|
@ -42,6 +49,7 @@ import {
|
|||
sanitizeToolCallInputs,
|
||||
sanitizeToolUseResultPairing,
|
||||
} from "./session/session-transcript-repair.js";
|
||||
import { isContextOverflowError } from "./errors.js";
|
||||
|
||||
// ============================================================
|
||||
// Error classification for auth profile rotation
|
||||
|
|
@ -89,11 +97,15 @@ export class Agent {
|
|||
private readonly stderr: NodeJS.WritableStream;
|
||||
private initialized = false;
|
||||
|
||||
// Context window settings (for pre-flight compaction)
|
||||
private readonly reserveTokens: number;
|
||||
|
||||
// Internal run state
|
||||
private _internalRun = false;
|
||||
private _isRunning = false;
|
||||
private _aborted = false;
|
||||
private _runMutex: Promise<void> = Promise.resolve();
|
||||
private _compactionPromise: Promise<void> = Promise.resolve();
|
||||
private currentUserDisplayPrompt: string | undefined;
|
||||
|
||||
// MulticaEvent subscribers (parallel to PiAgentCore's subscriber list)
|
||||
|
|
@ -188,8 +200,10 @@ export class Agent {
|
|||
return this.currentApiKey;
|
||||
},
|
||||
transformContext: async (messages) => {
|
||||
const sanitizedInputs = sanitizeToolCallInputs(messages);
|
||||
return sanitizeToolUseResultPairing(sanitizedInputs);
|
||||
let result = sanitizeToolCallInputs(messages);
|
||||
result = sanitizeToolUseResultPairing(result);
|
||||
result = this.preflightCompact(result);
|
||||
return result;
|
||||
},
|
||||
});
|
||||
|
||||
|
|
@ -260,6 +274,9 @@ export class Agent {
|
|||
? resolveApiKey(this.resolvedProvider, options.apiKey)
|
||||
: undefined;
|
||||
|
||||
// Store reserveTokens for pre-flight compaction
|
||||
this.reserveTokens = options.reserveTokens ?? 1024;
|
||||
|
||||
// 创建 SessionManager(带 context window 配置)
|
||||
this.session = new SessionManager({
|
||||
sessionId: this.sessionId,
|
||||
|
|
@ -425,6 +442,8 @@ export class Agent {
|
|||
prompt: string,
|
||||
options?: { displayPrompt?: string },
|
||||
): Promise<AgentRunResult> {
|
||||
// Wait for any in-flight compaction from the previous run
|
||||
await this._compactionPromise;
|
||||
await this.ensureInitialized();
|
||||
this.refreshAuthState();
|
||||
this.output.state.lastAssistantText = "";
|
||||
|
|
@ -444,6 +463,9 @@ export class Agent {
|
|||
const canRotate = !this.pinnedProfile && this.profileCandidates.length > 1;
|
||||
let lastError: unknown;
|
||||
|
||||
const MAX_OVERFLOW_COMPACTION_ATTEMPTS = 2;
|
||||
let overflowAttempts = 0;
|
||||
|
||||
// Loop to exhaust all candidate profiles on rotatable errors
|
||||
while (true) {
|
||||
try {
|
||||
|
|
@ -452,6 +474,34 @@ export class Agent {
|
|||
} catch (error) {
|
||||
lastError = error;
|
||||
|
||||
// Context overflow recovery: auto-compact and retry before trying auth rotation
|
||||
if (isContextOverflowError(error) && overflowAttempts < MAX_OVERFLOW_COMPACTION_ATTEMPTS) {
|
||||
overflowAttempts++;
|
||||
this.stderr.write(
|
||||
`[context-overflow] Overflow detected (attempt ${overflowAttempts}/${MAX_OVERFLOW_COMPACTION_ATTEMPTS}), compacting...\n`,
|
||||
);
|
||||
const messages = this.agent.state.messages.slice();
|
||||
const result = await this.session.maybeCompact(messages);
|
||||
if (result?.kept) {
|
||||
this.agent.replaceMessages(result.kept);
|
||||
this.output.state.lastAssistantText = "";
|
||||
continue; // retry with compacted messages
|
||||
}
|
||||
// Forced fallback: estimation may diverge from reality (the LLM
|
||||
// already told us the context is too large), so drop the oldest
|
||||
// half of messages even when maybeCompact thinks no compaction is needed.
|
||||
if (messages.length > MIN_KEEP_MESSAGES) {
|
||||
const keepCount = Math.max(MIN_KEEP_MESSAGES, Math.floor(messages.length / 2));
|
||||
const forcedKept = messages.slice(-keepCount);
|
||||
this.stderr.write(
|
||||
`[context-overflow] Forced compaction: ${messages.length} → ${forcedKept.length} messages\n`,
|
||||
);
|
||||
this.agent.replaceMessages(forcedKept);
|
||||
this.output.state.lastAssistantText = "";
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
const reason = classifyError(error);
|
||||
if (this.currentProfileId && isRotatableError(reason)) {
|
||||
markAuthProfileFailure(this.currentProfileId, reason);
|
||||
|
|
@ -615,35 +665,88 @@ export class Agent {
|
|||
// Skip compaction during internal runs — internal messages will be
|
||||
// rolled back from memory afterwards, so compacting now would be incorrect.
|
||||
if (message.role === "assistant" && !this._internalRun) {
|
||||
void this.maybeCompact();
|
||||
this._compactionPromise = this.maybeCompact().catch((err) => {
|
||||
console.error("[Agent] Compaction failed:", err);
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Pre-flight context compaction — runs inside transformContext before every LLM call.
|
||||
* Pure in-memory, no disk writes. Prunes tool results and drops oldest messages
|
||||
* when the estimated token utilization exceeds the compaction trigger threshold.
|
||||
*/
|
||||
private preflightCompact(messages: AgentMessage[]): AgentMessage[] {
|
||||
const estimation = estimateTokenUsage({
|
||||
messages,
|
||||
systemPrompt: this.agent.state.systemPrompt,
|
||||
contextWindowTokens: this.contextWindowGuard.tokens,
|
||||
reserveTokens: this.reserveTokens,
|
||||
});
|
||||
|
||||
if (estimation.utilizationRatio < COMPACTION_TRIGGER_RATIO) {
|
||||
return messages; // fast path
|
||||
}
|
||||
|
||||
const originalCount = messages.length;
|
||||
let result = messages;
|
||||
|
||||
// Phase 1: Prune tool results (soft trim + hard clear)
|
||||
const pruneResult = pruneToolResults({
|
||||
messages: result,
|
||||
contextWindowTokens: this.contextWindowGuard.tokens,
|
||||
});
|
||||
if (pruneResult.changed) {
|
||||
result = pruneResult.messages;
|
||||
}
|
||||
|
||||
// Re-estimate after pruning
|
||||
const afterPrune = estimateTokenUsage({
|
||||
messages: result,
|
||||
systemPrompt: this.agent.state.systemPrompt,
|
||||
contextWindowTokens: this.contextWindowGuard.tokens,
|
||||
reserveTokens: this.reserveTokens,
|
||||
});
|
||||
|
||||
// Phase 2: Drop oldest messages if still over threshold
|
||||
if (afterPrune.utilizationRatio >= COMPACTION_TRIGGER_RATIO) {
|
||||
const compacted = compactMessagesTokenAware(result, afterPrune.availableTokens);
|
||||
if (compacted) {
|
||||
result = compacted.kept;
|
||||
}
|
||||
}
|
||||
|
||||
if (result.length < originalCount) {
|
||||
const saved = originalCount - result.length;
|
||||
this.stderr.write(
|
||||
`[pre-flight compaction] pruned ${saved} messages (${originalCount} → ${result.length})\n`,
|
||||
);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private async maybeCompact() {
|
||||
const messages = this.agent.state.messages.slice();
|
||||
if (!this.session.needsCompaction(messages)) return;
|
||||
|
||||
try {
|
||||
const result = await this.session.maybeCompact(messages);
|
||||
if (!result) return;
|
||||
const result = await this.session.maybeCompact(messages);
|
||||
if (!result) return;
|
||||
|
||||
this.emitMulticaEvent({ type: "compaction_start" });
|
||||
if (result?.kept) {
|
||||
this.agent.replaceMessages(result.kept);
|
||||
}
|
||||
const endEvent: CompactionEndEvent = {
|
||||
type: "compaction_end",
|
||||
removed: result?.removedCount ?? 0,
|
||||
kept: result?.kept.length ?? messages.length,
|
||||
tokensRemoved: result?.tokensRemoved,
|
||||
tokensKept: result?.tokensKept,
|
||||
reason: result?.reason ?? "tokens",
|
||||
};
|
||||
this.emitMulticaEvent(endEvent);
|
||||
} catch (err) {
|
||||
throw err;
|
||||
this.emitMulticaEvent({ type: "compaction_start" });
|
||||
if (result.kept) {
|
||||
this.agent.replaceMessages(result.kept);
|
||||
}
|
||||
const endEvent: CompactionEndEvent = {
|
||||
type: "compaction_end",
|
||||
removed: result.removedCount ?? 0,
|
||||
kept: result.kept.length ?? messages.length,
|
||||
tokensRemoved: result.tokensRemoved,
|
||||
tokensKept: result.tokensKept,
|
||||
reason: result.reason ?? "tokens",
|
||||
};
|
||||
this.emitMulticaEvent(endEvent);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -44,7 +44,7 @@ vi.mock("../context-window/index.js", async () => {
|
|||
const systemPromptTokens = params.systemPrompt ? 100 : 0;
|
||||
const reserve = params.reserveTokens ?? 1024;
|
||||
const availableTokens = Math.max(0, params.contextWindowTokens - systemPromptTokens - reserve);
|
||||
const utilizationRatio = availableTokens > 0 ? (messageTokens * 1.2) / availableTokens : 1;
|
||||
const utilizationRatio = availableTokens > 0 ? (messageTokens * 1.5) / availableTokens : 1;
|
||||
|
||||
return {
|
||||
messageTokens,
|
||||
|
|
@ -234,7 +234,7 @@ describe("compaction", () => {
|
|||
// 100 * 10 = 1000 message tokens
|
||||
// System: 100 tokens, Reserve: 1024
|
||||
// Available: 2000 - 100 - 1024 = 876
|
||||
// Utilization: (1000 * 1.2) / 876 = 1.37 > 0.8
|
||||
// Utilization: (1000 * 1.5) / 876 = 1.71 > 0.8
|
||||
const result = compactMessages(messages, {
|
||||
mode: "tokens",
|
||||
contextWindowTokens: 2000,
|
||||
|
|
@ -249,7 +249,7 @@ describe("compaction", () => {
|
|||
const messages = createMessages(5);
|
||||
// 5 * 10 = 50 message tokens
|
||||
// Available: 10000 - 100 - 1024 = 8876
|
||||
// Utilization: (50 * 1.2) / 8876 = 0.007 < 0.8
|
||||
// Utilization: (50 * 1.5) / 8876 = 0.008 < 0.8
|
||||
const result = compactMessages(messages, {
|
||||
mode: "tokens",
|
||||
contextWindowTokens: 10000,
|
||||
|
|
|
|||
|
|
@ -188,7 +188,7 @@ describe("formatCoalescedAnnouncementMessage", () => {
|
|||
|
||||
const msg = formatCoalescedAnnouncementMessage(records);
|
||||
|
||||
expect(msg).toContain("All 2 background tasks have completed");
|
||||
expect(msg).toContain("All 2 background task(s) have completed");
|
||||
expect(msg).toContain('Task 1: "Task A"');
|
||||
expect(msg).toContain("Found issue A");
|
||||
expect(msg).toContain('Task 2: "Task B"');
|
||||
|
|
@ -251,4 +251,44 @@ describe("formatCoalescedAnnouncementMessage", () => {
|
|||
expect(msg).toContain("上海:多云,9°C");
|
||||
expect(msg).toContain("MUST include findings from every task item above");
|
||||
});
|
||||
|
||||
it("includes continuation prompt when next is provided", () => {
|
||||
const records = [
|
||||
makeRecord({ runId: "run-1", label: "AAPL data", findings: "AAPL revenue: $100B" }),
|
||||
makeRecord({ runId: "run-2", label: "MSFT data", findings: "MSFT revenue: $200B" }),
|
||||
];
|
||||
|
||||
const msg = formatCoalescedAnnouncementMessage(records, "Summarize all data and write a PDF investment report");
|
||||
|
||||
expect(msg).toContain("CONTINUATION TASK");
|
||||
expect(msg).toContain("Summarize all data and write a PDF investment report");
|
||||
expect(msg).toContain("AAPL revenue: $100B");
|
||||
expect(msg).toContain("MSFT revenue: $200B");
|
||||
// Should NOT contain the default summarize instruction
|
||||
expect(msg).not.toContain("Summarize these results naturally for the user");
|
||||
});
|
||||
|
||||
it("uses continuation prompt even for single record when next is provided", () => {
|
||||
const records = [
|
||||
makeRecord({ runId: "run-1", label: "Data collection", findings: "All data collected" }),
|
||||
];
|
||||
|
||||
const msg = formatCoalescedAnnouncementMessage(records, "Generate the final report");
|
||||
|
||||
expect(msg).toContain("CONTINUATION TASK");
|
||||
expect(msg).toContain("Generate the final report");
|
||||
expect(msg).toContain("All data collected");
|
||||
});
|
||||
|
||||
it("uses default summarize instruction when next is not provided", () => {
|
||||
const records = [
|
||||
makeRecord({ runId: "run-1" }),
|
||||
makeRecord({ runId: "run-2" }),
|
||||
];
|
||||
|
||||
const msg = formatCoalescedAnnouncementMessage(records);
|
||||
|
||||
expect(msg).not.toContain("CONTINUATION TASK");
|
||||
expect(msg).toContain("Summarize these results naturally for the user");
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -193,12 +193,17 @@ export function formatAnnouncementMessage(params: FormatAnnouncementParams): str
|
|||
/**
|
||||
* Format a coalesced announcement message from multiple completed subagent runs.
|
||||
* When only one record is provided, delegates to formatAnnouncementMessage.
|
||||
*
|
||||
* @param next — Optional continuation prompt from a SubagentGroup. When present,
|
||||
* the parent agent is instructed to execute the continuation using the combined
|
||||
* findings, rather than just summarizing.
|
||||
*/
|
||||
export function formatCoalescedAnnouncementMessage(
|
||||
records: SubagentRunRecord[],
|
||||
next?: string,
|
||||
): string {
|
||||
// Single record: delegate to existing format for backward-compatible behavior
|
||||
if (records.length === 1) {
|
||||
// Single record without continuation: delegate to existing format
|
||||
if (records.length === 1 && !next) {
|
||||
const r = records[0]!;
|
||||
return formatAnnouncementMessage({
|
||||
runId: r.runId,
|
||||
|
|
@ -214,10 +219,9 @@ export function formatCoalescedAnnouncementMessage(
|
|||
});
|
||||
}
|
||||
|
||||
// Multiple records: build combined message.
|
||||
// Include a strict raw-findings section so parent can reliably cover every task result.
|
||||
// Multiple records (or single with continuation): build combined message.
|
||||
const parts: string[] = [
|
||||
`All ${records.length} background tasks have completed. Here are the combined results:`,
|
||||
`All ${records.length} background task(s) have completed. Here are the combined results:`,
|
||||
"",
|
||||
];
|
||||
|
||||
|
|
@ -262,14 +266,30 @@ export function formatCoalescedAnnouncementMessage(
|
|||
);
|
||||
}
|
||||
|
||||
parts.push(
|
||||
"",
|
||||
"Summarize these results naturally for the user.",
|
||||
"You MUST include findings from every task item above, without omission.",
|
||||
"Keep it concise, but preserve concrete findings from each task.",
|
||||
"Do not mention technical details like session IDs or that these were background tasks.",
|
||||
"You can respond with NO_REPLY if no announcement is needed.",
|
||||
);
|
||||
// Continuation vs. summarization
|
||||
if (next) {
|
||||
parts.push(
|
||||
"",
|
||||
"---",
|
||||
"",
|
||||
"CONTINUATION TASK: The user's original request requires further work using the findings above.",
|
||||
"Execute the following task now, using ALL the collected data:",
|
||||
"",
|
||||
next,
|
||||
"",
|
||||
"Use the raw findings above as your data source. Call tools as needed to complete this task.",
|
||||
"Do not mention technical details like session IDs or that these were background tasks.",
|
||||
);
|
||||
} else {
|
||||
parts.push(
|
||||
"",
|
||||
"Summarize these results naturally for the user.",
|
||||
"You MUST include findings from every task item above, without omission.",
|
||||
"Keep it concise, but preserve concrete findings from each task.",
|
||||
"Do not mention technical details like session IDs or that these were background tasks.",
|
||||
"You can respond with NO_REPLY if no announcement is needed.",
|
||||
);
|
||||
}
|
||||
|
||||
return parts.join("\n");
|
||||
}
|
||||
|
|
@ -289,8 +309,9 @@ export function formatCoalescedAnnouncementMessage(
|
|||
export function runCoalescedAnnounceFlow(
|
||||
requesterSessionId: string,
|
||||
records: SubagentRunRecord[],
|
||||
next?: string,
|
||||
): boolean {
|
||||
const message = formatCoalescedAnnouncementMessage(records);
|
||||
const message = formatCoalescedAnnouncementMessage(records, next);
|
||||
|
||||
try {
|
||||
const hub = getHub();
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@ const rmSyncMock = vi.fn();
|
|||
|
||||
vi.mock("./registry-store.js", () => ({
|
||||
loadSubagentRuns: loadSubagentRunsMock,
|
||||
loadSubagentGroups: vi.fn(() => new Map()),
|
||||
saveSubagentRuns: saveSubagentRunsMock,
|
||||
}));
|
||||
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@
|
|||
import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
|
||||
import { join } from "node:path";
|
||||
import { DATA_DIR } from "@multica/utils";
|
||||
import type { SubagentRunRecord } from "./types.js";
|
||||
import type { SubagentRunRecord, SubagentGroup } from "./types.js";
|
||||
|
||||
const SUBAGENTS_DIR = join(DATA_DIR, "subagents");
|
||||
const RUNS_FILE = join(SUBAGENTS_DIR, "runs.json");
|
||||
|
|
@ -15,6 +15,7 @@ const RUNS_FILE = join(SUBAGENTS_DIR, "runs.json");
|
|||
interface SubagentRunsStore {
|
||||
version: 1;
|
||||
runs: Record<string, SubagentRunRecord>;
|
||||
groups?: Record<string, SubagentGroup> | undefined;
|
||||
}
|
||||
|
||||
function ensureDir(): void {
|
||||
|
|
@ -48,13 +49,31 @@ export function loadSubagentRuns(): Map<string, SubagentRunRecord> {
|
|||
}
|
||||
}
|
||||
|
||||
/** Save all subagent runs to disk */
|
||||
export function saveSubagentRuns(runs: Map<string, SubagentRunRecord>): void {
|
||||
/** Load all persisted subagent groups */
|
||||
export function loadSubagentGroups(): Map<string, SubagentGroup> {
|
||||
if (!existsSync(RUNS_FILE)) return new Map();
|
||||
|
||||
try {
|
||||
const content = readFileSync(RUNS_FILE, "utf-8");
|
||||
const store = JSON.parse(content) as SubagentRunsStore;
|
||||
if (store.version !== 1 || !store.groups) return new Map();
|
||||
return new Map(Object.entries(store.groups));
|
||||
} catch {
|
||||
return new Map();
|
||||
}
|
||||
}
|
||||
|
||||
/** Save all subagent runs and groups to disk */
|
||||
export function saveSubagentRuns(
|
||||
runs: Map<string, SubagentRunRecord>,
|
||||
groups?: Map<string, SubagentGroup>,
|
||||
): void {
|
||||
ensureDir();
|
||||
|
||||
const store: SubagentRunsStore = {
|
||||
version: 1,
|
||||
runs: Object.fromEntries(runs),
|
||||
groups: groups && groups.size > 0 ? Object.fromEntries(groups) : undefined,
|
||||
};
|
||||
|
||||
writeFileSync(RUNS_FILE, JSON.stringify(store, null, 2), "utf-8");
|
||||
|
|
|
|||
|
|
@ -6,11 +6,12 @@
|
|||
*/
|
||||
|
||||
import { getHub, isHubInitialized } from "../../hub/hub-singleton.js";
|
||||
import { loadSubagentRuns, saveSubagentRuns } from "./registry-store.js";
|
||||
import { loadSubagentRuns, saveSubagentRuns, loadSubagentGroups } from "./registry-store.js";
|
||||
import { readLatestAssistantReply, runCoalescedAnnounceFlow } from "./announce.js";
|
||||
import type {
|
||||
RegisterSubagentRunParams,
|
||||
SubagentRunRecord,
|
||||
SubagentGroup,
|
||||
} from "./types.js";
|
||||
import { resolveSessionDir } from "../session/storage.js";
|
||||
import { rmSync } from "node:fs";
|
||||
|
|
@ -28,6 +29,7 @@ const SWEEP_INTERVAL_MS = 60 * 1000;
|
|||
// ============================================================================
|
||||
|
||||
const subagentRuns = new Map<string, SubagentRunRecord>();
|
||||
const subagentGroups = new Map<string, SubagentGroup>();
|
||||
let sweepTimer: ReturnType<typeof setInterval> | undefined;
|
||||
const resumedRequesters = new Set<string>();
|
||||
|
||||
|
|
@ -50,6 +52,12 @@ export function initSubagentRegistry(): void {
|
|||
}
|
||||
}
|
||||
|
||||
// Restore groups
|
||||
const persistedGroups = loadSubagentGroups();
|
||||
for (const [groupId, group] of persistedGroups) {
|
||||
subagentGroups.set(groupId, group);
|
||||
}
|
||||
|
||||
// Process incomplete runs
|
||||
const affectedRequesters = new Set<string>();
|
||||
|
||||
|
|
@ -91,6 +99,45 @@ export function initSubagentRegistry(): void {
|
|||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Group management
|
||||
// ============================================================================
|
||||
|
||||
/** Create a new subagent group. Returns the group record. */
|
||||
export function createSubagentGroup(params: {
|
||||
groupId: string;
|
||||
requesterSessionId: string;
|
||||
label?: string;
|
||||
next?: string;
|
||||
}): SubagentGroup {
|
||||
const group: SubagentGroup = {
|
||||
groupId: params.groupId,
|
||||
requesterSessionId: params.requesterSessionId,
|
||||
label: params.label,
|
||||
next: params.next,
|
||||
createdAt: Date.now(),
|
||||
};
|
||||
subagentGroups.set(params.groupId, group);
|
||||
persist();
|
||||
return group;
|
||||
}
|
||||
|
||||
/** Get a group by ID. */
|
||||
export function getSubagentGroup(groupId: string): SubagentGroup | undefined {
|
||||
return subagentGroups.get(groupId);
|
||||
}
|
||||
|
||||
/** List all runs belonging to a group. */
|
||||
export function listGroupRuns(groupId: string): SubagentRunRecord[] {
|
||||
const result: SubagentRunRecord[] = [];
|
||||
for (const record of subagentRuns.values()) {
|
||||
if (record.groupId === groupId) {
|
||||
result.push(record);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/** Register a new subagent run and start tracking its lifecycle. */
|
||||
export function registerSubagentRun(params: RegisterSubagentRunParams): SubagentRunRecord {
|
||||
const {
|
||||
|
|
@ -102,6 +149,7 @@ export function registerSubagentRun(params: RegisterSubagentRunParams): Subagent
|
|||
cleanup = "delete",
|
||||
timeoutSeconds,
|
||||
announce,
|
||||
groupId,
|
||||
start,
|
||||
} = params;
|
||||
|
||||
|
|
@ -113,6 +161,7 @@ export function registerSubagentRun(params: RegisterSubagentRunParams): Subagent
|
|||
label,
|
||||
cleanup,
|
||||
announce,
|
||||
groupId,
|
||||
createdAt: Date.now(),
|
||||
};
|
||||
|
||||
|
|
@ -190,6 +239,7 @@ export function shutdownSubagentRegistry(): void {
|
|||
/** Reset all state (for testing). */
|
||||
export function resetSubagentRegistryForTests(): void {
|
||||
subagentRuns.clear();
|
||||
subagentGroups.clear();
|
||||
resumedRequesters.clear();
|
||||
stopSweeper();
|
||||
}
|
||||
|
|
@ -300,37 +350,59 @@ function captureFindings(record: SubagentRunRecord): void {
|
|||
/**
|
||||
* Phase 2: Announce completed-but-unannounced runs.
|
||||
*
|
||||
* Runs with announce="silent" are held back until ALL silent runs from the
|
||||
* same requester have completed. All other runs (immediate / undefined) are
|
||||
* announced per-completion as before.
|
||||
* Three announcement paths:
|
||||
* 1. Grouped runs — wait for all runs in the group to complete, then announce
|
||||
* together with the group's `next` continuation prompt (if any).
|
||||
* 2. Ungrouped silent runs — legacy behavior: wait for ALL silent runs from
|
||||
* the same requester to complete, then announce together.
|
||||
* 3. Ungrouped immediate runs — announce per-completion (default).
|
||||
*/
|
||||
function checkAndAnnounce(requesterSessionId: string): void {
|
||||
const allRuns = listSubagentRuns(requesterSessionId);
|
||||
|
||||
// ── Immediate runs: announce per-completion (default behavior) ──
|
||||
const immediateReady = allRuns.filter(
|
||||
// ── 1. Grouped runs: announce by group when all members complete ──
|
||||
const groupIds = new Set<string>();
|
||||
for (const r of allRuns) {
|
||||
if (r.groupId && !r.announced) groupIds.add(r.groupId);
|
||||
}
|
||||
|
||||
for (const groupId of groupIds) {
|
||||
const groupRuns = allRuns.filter(r => r.groupId === groupId);
|
||||
const unannounced = groupRuns.filter(r => !r.announced);
|
||||
const ready = unannounced.filter(r => r.endedAt !== undefined && r.findingsCaptured);
|
||||
|
||||
if (ready.length > 0 && ready.length === unannounced.length) {
|
||||
const group = subagentGroups.get(groupId);
|
||||
announceRuns(requesterSessionId, ready, group?.next);
|
||||
}
|
||||
}
|
||||
|
||||
// ── 2. Ungrouped runs: original immediate/silent logic ──
|
||||
const ungrouped = allRuns.filter(r => !r.groupId);
|
||||
|
||||
// Immediate: announce per-completion
|
||||
const immediateReady = ungrouped.filter(
|
||||
r => !r.announced && r.endedAt !== undefined && r.findingsCaptured && r.announce !== "silent",
|
||||
);
|
||||
if (immediateReady.length > 0) {
|
||||
announceGroup(requesterSessionId, immediateReady);
|
||||
announceRuns(requesterSessionId, immediateReady);
|
||||
}
|
||||
|
||||
// ── Silent runs: announce only when ALL silent runs are done ──
|
||||
const silentRuns = allRuns.filter(r => r.announce === "silent");
|
||||
// Silent: announce only when ALL ungrouped silent runs are done
|
||||
const silentRuns = ungrouped.filter(r => r.announce === "silent");
|
||||
const unannouncedSilent = silentRuns.filter(r => !r.announced);
|
||||
const silentReady = unannouncedSilent.filter(
|
||||
r => r.endedAt !== undefined && r.findingsCaptured,
|
||||
);
|
||||
|
||||
// All unannounced silent runs must be ready (ended + findings captured)
|
||||
if (silentReady.length > 0 && silentReady.length === unannouncedSilent.length) {
|
||||
announceGroup(requesterSessionId, silentReady);
|
||||
announceRuns(requesterSessionId, silentReady);
|
||||
}
|
||||
}
|
||||
|
||||
/** Announce a group of runs and mark them as announced. */
|
||||
function announceGroup(requesterSessionId: string, runs: SubagentRunRecord[]): void {
|
||||
const announced = runCoalescedAnnounceFlow(requesterSessionId, runs);
|
||||
/** Announce a batch of completed runs and mark them as announced. */
|
||||
function announceRuns(requesterSessionId: string, runs: SubagentRunRecord[], next?: string): void {
|
||||
const announced = runCoalescedAnnounceFlow(requesterSessionId, runs, next);
|
||||
|
||||
if (announced) {
|
||||
for (const r of runs) {
|
||||
|
|
@ -415,9 +487,18 @@ function sweep(): void {
|
|||
}
|
||||
}
|
||||
|
||||
// Clean up groups whose runs have all been archived
|
||||
for (const [groupId] of subagentGroups) {
|
||||
const hasActiveRuns = [...subagentRuns.values()].some(r => r.groupId === groupId);
|
||||
if (!hasActiveRuns) {
|
||||
subagentGroups.delete(groupId);
|
||||
removed++;
|
||||
}
|
||||
}
|
||||
|
||||
if (removed > 0) {
|
||||
persist();
|
||||
console.log(`[SubagentRegistry] Archived ${removed} completed run(s)`);
|
||||
console.log(`[SubagentRegistry] Archived ${removed} completed run(s)/group(s)`);
|
||||
}
|
||||
|
||||
if (subagentRuns.size === 0) {
|
||||
|
|
@ -431,7 +512,7 @@ function sweep(): void {
|
|||
|
||||
function persist(): void {
|
||||
try {
|
||||
saveSubagentRuns(subagentRuns);
|
||||
saveSubagentRuns(subagentRuns, subagentGroups);
|
||||
} catch (err) {
|
||||
console.error(`[SubagentRegistry] Failed to persist runs:`, err);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -11,6 +11,26 @@ export type SubagentRunOutcome = {
|
|||
error?: string | undefined;
|
||||
};
|
||||
|
||||
/**
|
||||
* A logical group of subagent runs that are tracked together.
|
||||
* Groups enable "collect all, then act" workflows:
|
||||
* all runs in a group must complete before the combined results
|
||||
* (plus an optional `next` continuation) are announced to the parent.
|
||||
*/
|
||||
export type SubagentGroup = {
|
||||
/** Unique group identifier (UUIDv7) */
|
||||
groupId: string;
|
||||
/** Session ID of the parent (requester) agent */
|
||||
requesterSessionId: string;
|
||||
/** Optional human-readable label for the group */
|
||||
label?: string | undefined;
|
||||
/** Continuation prompt executed after all runs in the group complete.
|
||||
* Injected into the announcement so the parent agent acts on the combined findings. */
|
||||
next?: string | undefined;
|
||||
/** Timestamp when the group was created */
|
||||
createdAt: number;
|
||||
};
|
||||
|
||||
/** Persistent record tracking a single subagent run */
|
||||
export type SubagentRunRecord = {
|
||||
/** Unique run identifier (UUIDv7) */
|
||||
|
|
@ -48,6 +68,9 @@ export type SubagentRunRecord = {
|
|||
/** Announcement mode: "immediate" (default) announces per-completion,
|
||||
* "silent" defers until all silent runs from the same requester complete. */
|
||||
announce?: "immediate" | "silent" | undefined;
|
||||
/** Group ID this run belongs to (if any). Runs in a group are announced
|
||||
* together when all complete, regardless of the `announce` field. */
|
||||
groupId?: string | undefined;
|
||||
};
|
||||
|
||||
/** Parameters for registering a new subagent run */
|
||||
|
|
@ -63,6 +86,12 @@ export type RegisterSubagentRunParams = {
|
|||
start?: (() => void) | undefined;
|
||||
/** Announcement mode: "immediate" (default) or "silent" (defer until all silent runs complete). */
|
||||
announce?: "immediate" | "silent" | undefined;
|
||||
/** Group ID to join. Runs in a group are announced together when all complete. */
|
||||
groupId?: string | undefined;
|
||||
/** Continuation prompt for the group. Only used on group creation (first spawn).
|
||||
* After all runs in the group complete, this prompt is included in the announcement
|
||||
* so the parent agent can act on the combined findings (e.g. summarize, write PDF). */
|
||||
next?: string | undefined;
|
||||
};
|
||||
|
||||
/** Parameters for the announce flow */
|
||||
|
|
|
|||
|
|
@ -262,23 +262,47 @@ export function buildConditionalToolSections(
|
|||
lines.push(
|
||||
"## Sub-Agents",
|
||||
"If a task is complex or long-running, spawn a sub-agent. It will do the work and report back when done.",
|
||||
"IMPORTANT: After spawning sub-agents, do NOT immediately check on them with sessions_list. " +
|
||||
"Results are delivered directly into your context automatically when the sub-agent finishes. " +
|
||||
"Continue with other tasks or finish your turn and wait for the results to arrive.",
|
||||
"You may use sessions_list to check on sub-agents only if a long time has passed or the user explicitly asks about their status.",
|
||||
"Sub-agents cannot spawn nested sub-agents.",
|
||||
"",
|
||||
"### Critical Rules",
|
||||
"- **NEVER fabricate, guess, or make up data that a sub-agent has not yet returned.** " +
|
||||
"This includes completion status — do NOT claim tasks are done until you receive actual results.",
|
||||
"- After spawning, do NOT proceed with work that depends on the sub-agent results. " +
|
||||
"You can still chat with the user, do unrelated tasks, or explain what the sub-agents are working on.",
|
||||
"- Sub-agents cannot spawn nested sub-agents.",
|
||||
"- You can use `sessions_list` to check sub-agent status if needed.",
|
||||
"",
|
||||
"### Groups and Continuation (`next`) — ALWAYS use for multi-agent tasks",
|
||||
"When spawning multiple sub-agents, **always** use `next` to define the follow-up work. " +
|
||||
"This is the standard pattern — do NOT use bare `announce: \"silent\"` for multi-agent collect-then-act workflows.",
|
||||
"",
|
||||
"```",
|
||||
"// First spawn — creates a group automatically, returns groupId",
|
||||
'sessions_spawn({ task: "Get AAPL financials", next: "Summarize all data and write a PDF report", label: "AAPL" })',
|
||||
"// → { groupId: \"grp-abc\", runId: \"...\" }",
|
||||
"",
|
||||
"// Subsequent spawns — join the same group",
|
||||
'sessions_spawn({ task: "Get MSFT financials", groupId: "grp-abc", label: "MSFT" })',
|
||||
'sessions_spawn({ task: "Get GOOG financials", groupId: "grp-abc", label: "GOOG" })',
|
||||
"```",
|
||||
"",
|
||||
"The system waits for ALL runs in the group to complete, then delivers the combined findings " +
|
||||
"plus the `next` continuation prompt back to you. You can then use tools (write files, call APIs, etc.) " +
|
||||
"to complete the follow-up work. The user is NOT blocked during this process — they can keep chatting.",
|
||||
"",
|
||||
"Use `next` whenever the user's request involves: collect data → then act on it (summarize, analyze, generate files).",
|
||||
"Without `next`, findings are summarized but no further action is taken.",
|
||||
"",
|
||||
"### Announce Modes (when not using groups)",
|
||||
"- `announce: \"immediate\"` (default): findings delivered per sub-agent as each completes.",
|
||||
"- `announce: \"silent\"`: all findings held until every silent sub-agent finishes, then delivered together.",
|
||||
"Groups always use silent collection internally — you don't need to set announce when using groupId.",
|
||||
"",
|
||||
"### Timeout Guidelines",
|
||||
"Set timeoutSeconds generously — a sub-agent that times out loses all its work.",
|
||||
"- Simple tasks (search, read, summarize): 600 (10 min, the default)",
|
||||
"- Moderate tasks (multi-step research, file downloads + analysis): 900–1200 (15–20 min)",
|
||||
"- Complex tasks (code generation, PDF creation, multi-file operations): 1200–1800 (20–30 min)",
|
||||
"When in doubt, use a longer timeout. It is always better to wait longer than to lose completed work.",
|
||||
"",
|
||||
"### Announce Modes",
|
||||
"- `announce: \"immediate\"` (default): Each sub-agent's findings are delivered to you as soon as it completes.",
|
||||
"- `announce: \"silent\"`: All findings are held back until every silent sub-agent finishes, then delivered as ONE combined report.",
|
||||
"Use \"silent\" when you want to collect data from multiple sub-agents first, then summarize everything at once.",
|
||||
"When in doubt, use a longer timeout.",
|
||||
"",
|
||||
);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@
|
|||
|
||||
import { Type } from "@sinclair/typebox";
|
||||
import type { AgentTool } from "@mariozechner/pi-agent-core";
|
||||
import { listSubagentRuns, getSubagentRun } from "../subagent/registry.js";
|
||||
import { listSubagentRuns, getSubagentRun, getSubagentGroup } from "../subagent/registry.js";
|
||||
import type { SubagentRunRecord } from "../subagent/types.js";
|
||||
|
||||
const SessionsListSchema = Type.Object({
|
||||
|
|
@ -79,6 +79,11 @@ function formatRunDetail(record: SubagentRunRecord, now: number): string {
|
|||
];
|
||||
|
||||
if (record.label) lines.push(`Label: ${record.label}`);
|
||||
if (record.groupId) {
|
||||
const group = getSubagentGroup(record.groupId);
|
||||
lines.push(`Group: ${record.groupId}${group?.label ? ` (${group.label})` : ""}`);
|
||||
if (group?.next) lines.push(`Continuation: ${group.next.slice(0, 120)}${group.next.length > 120 ? "…" : ""}`);
|
||||
}
|
||||
lines.push(`Task: ${record.task}`);
|
||||
lines.push(`Status: ${status}${record.outcome?.error ? ` — ${record.outcome.error}` : ""}`);
|
||||
lines.push(`Child Session: ${record.childSessionId}`);
|
||||
|
|
@ -128,8 +133,7 @@ export function createSessionsListTool(
|
|||
description:
|
||||
"List all subagent runs spawned by this session and their current status. " +
|
||||
"Optionally pass a runId to get detailed information about a specific run. " +
|
||||
"NOTE: Do NOT call this immediately after spawning subagents — results arrive automatically in your context when subagents complete. " +
|
||||
"Only use this if a long time has passed or the user explicitly asks about subagent status.",
|
||||
"Use this to check subagent progress or when the user asks about status.",
|
||||
parameters: SessionsListSchema,
|
||||
execute: async (_toolCallId, args) => {
|
||||
const { runId } = args as SessionsListArgs;
|
||||
|
|
@ -177,21 +181,59 @@ export function createSessionsListTool(
|
|||
|
||||
const someRunning = runs.some((r) => !r.endedAt);
|
||||
|
||||
// Build status lines for each run
|
||||
// Build status lines, grouping runs by groupId
|
||||
const statusLines: string[] = [];
|
||||
for (let i = 0; i < runs.length; i++) {
|
||||
const r = runs[i]!;
|
||||
const groupedRuns = new Map<string, SubagentRunRecord[]>();
|
||||
const ungroupedRuns: SubagentRunRecord[] = [];
|
||||
|
||||
for (const r of runs) {
|
||||
if (r.groupId) {
|
||||
const list = groupedRuns.get(r.groupId) ?? [];
|
||||
list.push(r);
|
||||
groupedRuns.set(r.groupId, list);
|
||||
} else {
|
||||
ungroupedRuns.push(r);
|
||||
}
|
||||
}
|
||||
|
||||
let idx = 0;
|
||||
|
||||
// Grouped runs
|
||||
for (const [gId, gRuns] of groupedRuns) {
|
||||
const group = getSubagentGroup(gId);
|
||||
const groupLabel = group?.label || `Group ${gId.slice(0, 8)}…`;
|
||||
const done = gRuns.filter(r => r.endedAt).length;
|
||||
const nextSnippet = group?.next ? ` → next: "${group.next.slice(0, 60)}${group.next.length > 60 ? "…" : ""}"` : "";
|
||||
statusLines.push(`\n 📦 ${groupLabel} (${done}/${gRuns.length} done${nextSnippet})`);
|
||||
|
||||
for (const r of gRuns) {
|
||||
idx++;
|
||||
const displayName = r.label || r.task.slice(0, 60);
|
||||
const status = resolveStatus(r);
|
||||
if (status === "running") {
|
||||
const elapsed = r.startedAt ? formatElapsed(now - r.startedAt) : "just spawned";
|
||||
statusLines.push(` ${idx}. [RUNNING] "${displayName}" (${elapsed})`);
|
||||
} else {
|
||||
const elapsed = r.startedAt && r.endedAt ? formatElapsed(r.endedAt - r.startedAt) : "";
|
||||
statusLines.push(` ${idx}. [${status.toUpperCase()}] "${displayName}" (${elapsed})`);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Ungrouped runs
|
||||
for (const r of ungroupedRuns) {
|
||||
idx++;
|
||||
const displayName = r.label || r.task.slice(0, 60);
|
||||
const status = resolveStatus(r);
|
||||
if (status === "running") {
|
||||
const elapsed = r.startedAt ? formatElapsed(now - r.startedAt) : "just spawned";
|
||||
statusLines.push(` ${i + 1}. [RUNNING] "${displayName}" (${elapsed})`);
|
||||
statusLines.push(` ${idx}. [RUNNING] "${displayName}" (${elapsed})`);
|
||||
} else {
|
||||
const elapsed = r.startedAt && r.endedAt ? formatElapsed(r.endedAt - r.startedAt) : "";
|
||||
const findings = r.findingsCaptured
|
||||
? (r.findings ? r.findings.slice(0, 200) + (r.findings.length > 200 ? "…" : "") : "(no output)")
|
||||
: "(findings not yet captured)";
|
||||
statusLines.push(` ${i + 1}. [${status.toUpperCase()}] "${displayName}" (${elapsed})\n Findings: ${findings}`);
|
||||
statusLines.push(` ${idx}. [${status.toUpperCase()}] "${displayName}" (${elapsed})\n Findings: ${findings}`);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ import { Type } from "@sinclair/typebox";
|
|||
import type { AgentTool } from "@mariozechner/pi-agent-core";
|
||||
import { getHub } from "../../hub/hub-singleton.js";
|
||||
import { buildSubagentSystemPrompt } from "../subagent/announce.js";
|
||||
import { registerSubagentRun } from "../subagent/registry.js";
|
||||
import { registerSubagentRun, createSubagentGroup, getSubagentGroup } from "../subagent/registry.js";
|
||||
import { resolveTools } from "../tools.js";
|
||||
|
||||
const SessionsSpawnSchema = Type.Object({
|
||||
|
|
@ -41,7 +41,26 @@ const SessionsSpawnSchema = Type.Object({
|
|||
"Announcement mode. 'immediate' (default): findings delivered as each subagent completes. " +
|
||||
"'silent': defer all announcements until every silent subagent from this session finishes, " +
|
||||
"then deliver one combined report. Use 'silent' when spawning multiple subagents to collect " +
|
||||
"data in parallel and you want to summarize everything at once.",
|
||||
"data in parallel and you want to summarize everything at once. " +
|
||||
"Ignored when groupId is provided (groups always collect all results before announcing).",
|
||||
}),
|
||||
),
|
||||
groupId: Type.Optional(
|
||||
Type.String({
|
||||
description:
|
||||
"Join an existing group. Pass the groupId returned by a previous sessions_spawn call " +
|
||||
"to add this subagent to the same group. All runs in a group are announced together " +
|
||||
"when the last one completes. If omitted AND 'next' is provided, a new group is created automatically.",
|
||||
}),
|
||||
),
|
||||
next: Type.Optional(
|
||||
Type.String({
|
||||
description:
|
||||
"Continuation task to execute after ALL subagents in the group complete. " +
|
||||
"Only used when creating a new group (first spawn without groupId). " +
|
||||
"When set, the combined findings from all subagents plus this 'next' prompt " +
|
||||
"are delivered to you so you can perform follow-up work (e.g. summarize, generate reports, write files). " +
|
||||
"Setting 'next' automatically creates a group and implies silent collection.",
|
||||
}),
|
||||
),
|
||||
});
|
||||
|
|
@ -53,12 +72,15 @@ type SessionsSpawnArgs = {
|
|||
cleanup?: "delete" | "keep";
|
||||
timeoutSeconds?: number;
|
||||
announce?: "immediate" | "silent";
|
||||
groupId?: string;
|
||||
next?: string;
|
||||
};
|
||||
|
||||
export type SessionsSpawnResult = {
|
||||
status: "accepted" | "error";
|
||||
childSessionId?: string;
|
||||
runId?: string;
|
||||
groupId?: string;
|
||||
error?: string;
|
||||
};
|
||||
|
||||
|
|
@ -79,13 +101,15 @@ export function createSessionsSpawnTool(
|
|||
label: "Spawn Subagent",
|
||||
description:
|
||||
"Spawn a background subagent to handle a specific task. The subagent runs in an isolated session with its own tool set. " +
|
||||
"When it completes, its findings are delivered directly into your context automatically — you do NOT need to poll or check. " +
|
||||
"IMPORTANT: After spawning subagents, continue with any other immediate tasks you have, or simply finish your turn and wait. " +
|
||||
"Do NOT call sessions_list to check on subagents you just spawned — results take time and will arrive on their own. " +
|
||||
"When it completes, its findings are delivered directly into your context automatically. " +
|
||||
"After spawning, do NOT proceed with work that depends on the results — but you can still chat or do unrelated tasks. " +
|
||||
"When spawning multiple subagents for a collect-then-act workflow, ALWAYS use the `next` parameter " +
|
||||
"on the first spawn to define follow-up work, then pass the returned groupId to subsequent spawns. " +
|
||||
"Use this for parallelizable work, long-running analysis, or tasks that benefit from isolation.",
|
||||
parameters: SessionsSpawnSchema,
|
||||
execute: async (_toolCallId, args) => {
|
||||
const { task, label, model, cleanup = "delete", timeoutSeconds, announce } = args as SessionsSpawnArgs;
|
||||
const { task, label, model, cleanup = "delete", timeoutSeconds, announce, next } = args as SessionsSpawnArgs;
|
||||
let { groupId } = args as SessionsSpawnArgs;
|
||||
|
||||
// Guard: subagents cannot spawn subagents
|
||||
if (options.isSubagent) {
|
||||
|
|
@ -102,6 +126,28 @@ export function createSessionsSpawnTool(
|
|||
const runId = uuidv7();
|
||||
const childSessionId = uuidv7();
|
||||
|
||||
// Validate groupId if provided
|
||||
if (groupId) {
|
||||
const existingGroup = getSubagentGroup(groupId);
|
||||
if (!existingGroup) {
|
||||
return {
|
||||
content: [{ type: "text", text: `Error: group not found: ${groupId}. Use the groupId returned by a previous sessions_spawn call.` }],
|
||||
details: { status: "error", error: `group not found: ${groupId}` },
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
// Auto-create group when `next` is provided without an existing groupId
|
||||
if (!groupId && next) {
|
||||
groupId = uuidv7();
|
||||
createSubagentGroup({
|
||||
groupId,
|
||||
requesterSessionId,
|
||||
label: label ? `Group: ${label}` : undefined,
|
||||
next,
|
||||
});
|
||||
}
|
||||
|
||||
// Resolve tools for the subagent (with isSubagent=true for policy filtering)
|
||||
const subagentTools = resolveTools({ isSubagent: true });
|
||||
const toolNames = subagentTools.map((t) => t.name);
|
||||
|
|
@ -135,21 +181,27 @@ export function createSessionsSpawnTool(
|
|||
label,
|
||||
cleanup,
|
||||
timeoutSeconds,
|
||||
announce,
|
||||
announce: groupId ? "silent" : announce,
|
||||
groupId,
|
||||
start: () => childAgent.write(task),
|
||||
});
|
||||
|
||||
// Build response text
|
||||
const groupInfo = groupId ? `\nGroup: ${groupId}` : "";
|
||||
const nextInfo = next ? `\nContinuation: "${next.slice(0, 100)}${next.length > 100 ? "…" : ""}"` : "";
|
||||
const responseText =
|
||||
`Subagent spawned: ${label || task.slice(0, 80)}\n` +
|
||||
`Run: ${runId}${groupInfo}${nextInfo}\n\n` +
|
||||
`⏳ WAITING FOR RESULTS — do NOT proceed with work that depends on these results.\n` +
|
||||
`Do NOT fabricate data or completion status. Results will arrive in your context automatically.`;
|
||||
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: `Subagent spawned successfully.\n\nRun ID: ${runId}\nSession: ${childSessionId}\nTask: ${label || task.slice(0, 80)}\n\nThe subagent is now working in the background. Its findings will be delivered directly into your context when it completes — do NOT poll or call sessions_list for it. Continue with other tasks or finish your turn.`,
|
||||
},
|
||||
],
|
||||
content: [{ type: "text", text: responseText }],
|
||||
details: {
|
||||
status: "accepted",
|
||||
childSessionId,
|
||||
runId,
|
||||
groupId,
|
||||
},
|
||||
};
|
||||
} catch (err) {
|
||||
|
|
|
|||
|
|
@ -9,13 +9,22 @@ import {
|
|||
type AgentMessageItem,
|
||||
type ExecApprovalRequestPayload,
|
||||
type ApprovalDecision,
|
||||
type CompactionEndEvent,
|
||||
} from "@multica/sdk";
|
||||
|
||||
export type ToolStatus = "running" | "success" | "error" | "interrupted";
|
||||
|
||||
export interface CompactionInfo {
|
||||
removed: number;
|
||||
kept: number;
|
||||
tokensRemoved?: number;
|
||||
tokensKept?: number;
|
||||
reason: string;
|
||||
}
|
||||
|
||||
export interface Message {
|
||||
id: string;
|
||||
role: "user" | "assistant" | "toolResult";
|
||||
role: "user" | "assistant" | "toolResult" | "system";
|
||||
content: ContentBlock[];
|
||||
agentId: string;
|
||||
stopReason?: string;
|
||||
|
|
@ -24,6 +33,8 @@ export interface Message {
|
|||
toolArgs?: Record<string, unknown>;
|
||||
toolStatus?: ToolStatus;
|
||||
isError?: boolean;
|
||||
systemType?: "compaction";
|
||||
compaction?: CompactionInfo;
|
||||
}
|
||||
|
||||
export interface ChatError {
|
||||
|
|
@ -215,6 +226,27 @@ export function useChat() {
|
|||
}
|
||||
case "tool_execution_update":
|
||||
break;
|
||||
case "compaction_end": {
|
||||
const ce = event as CompactionEndEvent;
|
||||
setMessages((prev) => [
|
||||
...prev,
|
||||
{
|
||||
id: uuidv7(),
|
||||
role: "system",
|
||||
content: [],
|
||||
agentId: payload.agentId,
|
||||
systemType: "compaction",
|
||||
compaction: {
|
||||
removed: ce.removed,
|
||||
kept: ce.kept,
|
||||
tokensRemoved: ce.tokensRemoved,
|
||||
tokensKept: ce.tokensKept,
|
||||
reason: ce.reason,
|
||||
},
|
||||
},
|
||||
]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}, []);
|
||||
|
||||
|
|
|
|||
|
|
@ -2,9 +2,17 @@ import type { ContentBlock } from "@multica/sdk"
|
|||
|
||||
export type ToolStatus = "running" | "success" | "error" | "interrupted"
|
||||
|
||||
export interface CompactionInfo {
|
||||
removed: number
|
||||
kept: number
|
||||
tokensRemoved?: number
|
||||
tokensKept?: number
|
||||
reason: string
|
||||
}
|
||||
|
||||
export interface Message {
|
||||
id: string
|
||||
role: "user" | "assistant" | "toolResult"
|
||||
role: "user" | "assistant" | "toolResult" | "system"
|
||||
content: ContentBlock[]
|
||||
agentId: string
|
||||
stopReason?: string
|
||||
|
|
@ -13,4 +21,6 @@ export interface Message {
|
|||
toolArgs?: Record<string, unknown>
|
||||
toolStatus?: ToolStatus
|
||||
isError?: boolean
|
||||
systemType?: "compaction"
|
||||
compaction?: CompactionInfo
|
||||
}
|
||||
|
|
|
|||
45
packages/ui/src/components/compaction-item.tsx
Normal file
45
packages/ui/src/components/compaction-item.tsx
Normal file
|
|
@ -0,0 +1,45 @@
|
|||
"use client"
|
||||
|
||||
import { memo } from "react"
|
||||
import { Scissors } from "lucide-react"
|
||||
import type { Message } from "@multica/store"
|
||||
|
||||
function formatTokens(n: number): string {
|
||||
if (n >= 1000) return `~${(n / 1000).toFixed(1)}k`
|
||||
return `${n}`
|
||||
}
|
||||
|
||||
interface CompactionItemProps {
|
||||
message: Message
|
||||
}
|
||||
|
||||
export const CompactionItem = memo(function CompactionItem({ message }: CompactionItemProps) {
|
||||
const info = message.compaction
|
||||
if (!info) return null
|
||||
|
||||
const label = info.reason === "summary" ? "Context summarized" : "Context compacted"
|
||||
const removed = `${info.removed} messages removed`
|
||||
const tokens = info.tokensRemoved != null
|
||||
? `, ${formatTokens(info.tokensRemoved)} tokens freed`
|
||||
: ""
|
||||
|
||||
return (
|
||||
<div className="py-0.5 px-2.5 text-sm text-muted-foreground">
|
||||
<div className="flex items-center gap-1.5 px-2.5 py-1">
|
||||
{/* Status dot */}
|
||||
<span className="size-1.5 rounded-full shrink-0 bg-muted-foreground/40" />
|
||||
|
||||
{/* Icon */}
|
||||
<Scissors className="size-3.5 shrink-0" />
|
||||
|
||||
{/* Label */}
|
||||
<span className="font-medium shrink-0">{label}</span>
|
||||
|
||||
{/* Stats */}
|
||||
<span className="ml-auto text-xs text-muted-foreground/60 shrink-0">
|
||||
{removed}{tokens}
|
||||
</span>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
})
|
||||
|
|
@ -5,6 +5,7 @@ import { MemoizedMarkdown } from "@multica/ui/components/markdown";
|
|||
import { StreamingMarkdown } from "@multica/ui/components/markdown/StreamingMarkdown";
|
||||
import { ToolCallItem } from "@multica/ui/components/tool-call-item";
|
||||
import { ThinkingItem } from "@multica/ui/components/thinking-item";
|
||||
import { CompactionItem } from "@multica/ui/components/compaction-item";
|
||||
import { cn, getTextContent } from "@multica/ui/lib/utils";
|
||||
import type { Message } from "@multica/store";
|
||||
import type { ContentBlock, ToolCall, ThinkingContent } from "@multica/sdk";
|
||||
|
|
@ -78,6 +79,11 @@ export const MessageList = memo(function MessageList({ messages, streamingIds }:
|
|||
return (
|
||||
<div className="relative p-6 px-4 sm:px-10 max-w-4xl mx-auto">
|
||||
{messages.map((msg) => {
|
||||
// System messages (e.g. compaction notifications)
|
||||
if (msg.role === "system") {
|
||||
return <CompactionItem key={msg.id} message={msg} />
|
||||
}
|
||||
|
||||
// ToolResult messages → render as tool execution item
|
||||
if (msg.role === "toolResult") {
|
||||
return <ToolCallItem key={msg.id} message={msg} />
|
||||
|
|
|
|||
463
skills/earnings-analysis/SKILL.md
Normal file
463
skills/earnings-analysis/SKILL.md
Normal file
|
|
@ -0,0 +1,463 @@
|
|||
---
|
||||
name: Earnings Analysis
|
||||
description: >-
|
||||
Analyze a company's financial statements (income statement, balance sheet,
|
||||
cash flow statement) to assess financial health, earnings quality, and
|
||||
competitive advantage. Use when the user asks to read/analyze financial
|
||||
statements, check earnings quality, assess financial health, evaluate
|
||||
profitability trends, or screen for competitive moats.
|
||||
version: 1.0.0
|
||||
metadata:
|
||||
emoji: "\U0001F4D1"
|
||||
requires:
|
||||
env:
|
||||
- FINANCIAL_DATASETS_API_KEY
|
||||
tags:
|
||||
- finance
|
||||
- earnings
|
||||
- analysis
|
||||
- statements
|
||||
- buffett
|
||||
userInvocable: true
|
||||
disableModelInvocation: false
|
||||
---
|
||||
|
||||
## Instructions
|
||||
|
||||
You are performing a structured financial statement analysis. Follow all steps in order and show your work. Output language must match the user's input language.
|
||||
|
||||
**IMPORTANT: This analysis requires BOTH structured data AND external context.** You MUST use `web_search` to gather earnings call insights, industry context, and explanations for data anomalies. An analysis based only on API data without any web research is incomplete. Expect to make 3-6 web searches throughout the analysis.
|
||||
|
||||
### Progress Checklist
|
||||
|
||||
```
|
||||
Earnings Analysis Progress:
|
||||
- [ ] Step 1: Gather financial data
|
||||
- [ ] Step 2: Income statement analysis
|
||||
- [ ] Step 3: Balance sheet analysis
|
||||
- [ ] Step 4: Cash flow statement analysis
|
||||
- [ ] Step 5: Buffett competitive advantage scoring
|
||||
- [ ] Step 6: Quality of earnings assessment
|
||||
- [ ] Step 7: SEC filing qualitative analysis
|
||||
- [ ] Step 8: Peer comparison (if requested)
|
||||
- [ ] Step 9: Present findings
|
||||
```
|
||||
|
||||
### Step 1: Gather Financial Data
|
||||
|
||||
Use `data` tool with `domain="finance"` for all structured data calls.
|
||||
|
||||
#### 1a. Structured Data
|
||||
|
||||
1. **Annual financial statements** (5 years):
|
||||
```
|
||||
action: "get_all_financial_statements"
|
||||
params: { ticker: "[TICKER]", period: "annual", limit: 5 }
|
||||
```
|
||||
This returns income statements, balance sheets, and cash flow statements together.
|
||||
|
||||
2. **Quarterly financial statements** (last 4 quarters):
|
||||
```
|
||||
action: "get_all_financial_statements"
|
||||
params: { ticker: "[TICKER]", period: "quarterly", limit: 4 }
|
||||
```
|
||||
|
||||
3. **Current financial metrics**:
|
||||
```
|
||||
action: "get_financial_metrics_snapshot"
|
||||
params: { ticker: "[TICKER]" }
|
||||
```
|
||||
|
||||
4. **Company facts**:
|
||||
```
|
||||
action: "get_company_facts"
|
||||
params: { ticker: "[TICKER]" }
|
||||
```
|
||||
Extract: `sector`, `industry` — needed for benchmark comparisons in later steps.
|
||||
|
||||
5. **Current stock price**:
|
||||
```
|
||||
action: "get_price_snapshot"
|
||||
params: { ticker: "[TICKER]" }
|
||||
```
|
||||
|
||||
6. **Recent news**:
|
||||
```
|
||||
action: "get_news"
|
||||
params: { ticker: "[TICKER]", limit: 10 }
|
||||
```
|
||||
Scan headlines for material events (earnings surprises, guidance changes, M&A, restructuring).
|
||||
|
||||
#### 1b. External Context (Web Search) — MANDATORY
|
||||
|
||||
You MUST run the following two web searches after gathering structured data. These are not optional.
|
||||
|
||||
1. **Latest earnings call highlights** (REQUIRED):
|
||||
```
|
||||
web_search("[COMPANY] latest earnings call highlights key takeaways [CURRENT_YEAR]")
|
||||
```
|
||||
Extract: management guidance, segment commentary, strategic priorities, forward outlook.
|
||||
This provides the "why" behind the numbers that structured data cannot explain.
|
||||
|
||||
2. **Industry/macro backdrop** (REQUIRED):
|
||||
```
|
||||
web_search("[INDUSTRY] industry outlook trends [CURRENT_YEAR]")
|
||||
```
|
||||
Extract: industry growth rate, tailwinds/headwinds, regulatory changes, competitive dynamics.
|
||||
This is needed to assess whether the company's performance is company-specific or industry-wide.
|
||||
|
||||
3. **Company-specific events** (conditional — run if news headlines or data show a material event):
|
||||
```
|
||||
web_search("[COMPANY] [EVENT_KEYWORD] impact analysis")
|
||||
```
|
||||
Examples: acquisition, restructuring, product launch, lawsuit, management change.
|
||||
|
||||
**Checkpoint:** Before proceeding to Step 2, verify that you have completed at least 2 web searches above. If you have not, go back and run them now.
|
||||
|
||||
### Step 2: Income Statement Analysis
|
||||
|
||||
Analyze the income statement across all 5 annual periods. Calculate and present:
|
||||
|
||||
1. **Revenue trend**:
|
||||
- Year-over-year growth rate for each year
|
||||
- 5-year CAGR: `(Revenue_latest / Revenue_earliest)^(1/years) - 1`
|
||||
- Flag any years with revenue decline
|
||||
|
||||
2. **Margin analysis** (calculate for each year, show the trend):
|
||||
- Gross Margin = Gross Profit / Revenue
|
||||
- Operating Margin = Operating Income / Revenue
|
||||
- Net Margin = Net Income / Revenue
|
||||
|
||||
3. **Margin benchmarks** (from [financial-ratios-benchmarks.md](references/financial-ratios-benchmarks.md)):
|
||||
- Compare each margin to sector benchmarks
|
||||
- Flag margins that are significantly above or below sector range
|
||||
|
||||
4. **EPS analysis**:
|
||||
- EPS trend over 5 years
|
||||
- EPS growth consistency (note any years of decline)
|
||||
|
||||
5. **Expense structure**:
|
||||
- Cost of revenue as % of revenue (trend)
|
||||
- SG&A as % of revenue (trend)
|
||||
- R&D as % of revenue (trend, if applicable)
|
||||
- Flag any expense category growing faster than revenue
|
||||
|
||||
6. **Contextual explanation** (REQUIRED — use web search results from Step 1b):
|
||||
- For each significant trend or inflection point in the data above, provide a **why** explanation using the earnings call and industry context gathered in Step 1b.
|
||||
- If revenue growth changed direction significantly (acceleration or deceleration > 10pp), run an additional search:
|
||||
`web_search("[COMPANY] revenue [growth/decline] reason [YEAR]")`
|
||||
- If margins shifted by more than 5pp year-over-year, run an additional search:
|
||||
`web_search("[COMPANY] margin [expansion/compression] [YEAR]")`
|
||||
- **Do not present a data table without narrative.** Every major trend must have a "why" attached, citing the source (earnings call, industry report, or company announcement).
|
||||
|
||||
Present as a table:
|
||||
|
||||
| Metric | Year 1 | Year 2 | Year 3 | Year 4 | Year 5 | 5Y CAGR |
|
||||
|--------|--------|--------|--------|--------|--------|---------|
|
||||
|
||||
### Step 3: Balance Sheet Analysis
|
||||
|
||||
Analyze the balance sheet across all 5 annual periods:
|
||||
|
||||
1. **Liquidity**:
|
||||
- Current Ratio = Current Assets / Current Liabilities
|
||||
- Quick Ratio = (Current Assets - Inventory) / Current Liabilities
|
||||
- Cash and equivalents trend
|
||||
|
||||
2. **Leverage**:
|
||||
- Cash vs. Total Debt (short-term + long-term debt)
|
||||
- Debt-to-Equity = Total Liabilities / Total Shareholders' Equity
|
||||
- Interest Coverage = Operating Income / Interest Expense
|
||||
- Debt payoff capacity = Total Debt / Net Income (in years)
|
||||
|
||||
3. **Asset quality**:
|
||||
- Receivables Turnover = Revenue / Accounts Receivable
|
||||
- Inventory Turnover = Cost of Revenue / Inventory (if applicable)
|
||||
- Goodwill as % of Total Assets (flag if > 30%)
|
||||
|
||||
4. **Equity structure**:
|
||||
- Retained earnings: year-over-year changes (growing?)
|
||||
- Preferred stock: present or absent?
|
||||
- Treasury stock: present? growing? (indicates buybacks)
|
||||
|
||||
5. **Working capital trend**:
|
||||
- Net Working Capital = Current Assets - Current Liabilities
|
||||
- Direction of change over 5 years
|
||||
|
||||
6. **Contextual explanation** (use web search results from Step 1b + additional searches as needed):
|
||||
- Explain major balance sheet changes using earnings call context from Step 1b.
|
||||
- If total debt changed significantly (> 30% YoY), you MUST search for the reason:
|
||||
`web_search("[COMPANY] debt [issuance/repayment] [YEAR]")`
|
||||
- If goodwill jumped, you MUST search for acquisition context:
|
||||
`web_search("[COMPANY] acquisition [YEAR]")`
|
||||
- Large treasury stock changes → confirm buyback program details:
|
||||
`web_search("[COMPANY] share buyback program")`
|
||||
|
||||
Compare key ratios to sector benchmarks from [financial-ratios-benchmarks.md](references/financial-ratios-benchmarks.md).
|
||||
|
||||
### Step 4: Cash Flow Statement Analysis
|
||||
|
||||
Analyze cash flow statements across all 5 annual periods:
|
||||
|
||||
1. **Operating cash flow quality**:
|
||||
- OCF vs. Net Income ratio for each year
|
||||
- Target: OCF/NI > 1.0 (cash earnings exceed accrual earnings)
|
||||
- Trend direction
|
||||
|
||||
2. **Free cash flow**:
|
||||
- FCF = Operating Cash Flow - Capital Expenditure
|
||||
- FCF Margin = FCF / Revenue
|
||||
- 5-year FCF trend and CAGR
|
||||
|
||||
3. **Capital intensity**:
|
||||
- CapEx / Revenue ratio
|
||||
- CapEx / Net Income ratio (Buffett benchmark: < 25% excellent, < 50% acceptable)
|
||||
- Is CapEx growing faster than revenue? (potential red flag)
|
||||
|
||||
4. **Cash flow composition**:
|
||||
- Net cash from operating activities (should be consistently positive)
|
||||
- Net cash from investing activities (negative = investing in growth)
|
||||
- Net cash from financing activities (pattern: debt vs. equity funded?)
|
||||
|
||||
5. **Shareholder returns**:
|
||||
- Dividends paid (from financing activities)
|
||||
- Share buybacks / treasury stock repurchase
|
||||
- Total payout ratio = (Dividends + Buybacks) / Net Income
|
||||
- Is the company returning cash while maintaining growth?
|
||||
|
||||
6. **Contextual explanation** (use web search results from Step 1b + additional searches as needed):
|
||||
- Explain cash flow patterns using earnings call context from Step 1b.
|
||||
- If CapEx spiked significantly in a particular year, you MUST search for what was built:
|
||||
`web_search("[COMPANY] capital expenditure investment [YEAR]")`
|
||||
- If FCF diverged sharply from net income, search for restructuring or working capital events.
|
||||
|
||||
Present a summary table:
|
||||
|
||||
| Metric | Year 1 | Year 2 | Year 3 | Year 4 | Year 5 |
|
||||
|--------|--------|--------|--------|--------|--------|
|
||||
|
||||
### Step 5: Buffett Competitive Advantage Scoring
|
||||
|
||||
Apply the scoring framework from [buffett-checklist.md](references/buffett-checklist.md).
|
||||
|
||||
For each of the 13 criteria across 4 categories:
|
||||
1. Calculate the metric value from the data gathered in Steps 1-4
|
||||
2. Determine the score based on the threshold table
|
||||
3. Note the sector-specific caveats (Financials, Utilities, REITs, Growth-stage)
|
||||
|
||||
Present the full scorecard table and the overall rating (Excellent / Good / Average / Weak).
|
||||
|
||||
### Step 6: Quality of Earnings Assessment
|
||||
|
||||
Assess whether reported earnings are backed by real cash and sustainable operations:
|
||||
|
||||
1. **Accrual ratio**:
|
||||
- Formula: (Net Income - Operating Cash Flow) / Total Assets
|
||||
- Interpretation: Lower is better. High positive values suggest earnings are driven by accruals rather than cash.
|
||||
- Red flag threshold: > 10%
|
||||
|
||||
2. **Revenue recognition quality**:
|
||||
- Compare Accounts Receivable growth rate vs. Revenue growth rate
|
||||
- If AR grows significantly faster than revenue → potential aggressive revenue recognition
|
||||
- Red flag threshold: AR growth > Revenue growth + 5 percentage points
|
||||
|
||||
3. **Inventory quality** (if applicable):
|
||||
- Compare Inventory growth rate vs. Cost of Revenue growth rate
|
||||
- Rising inventory vs. flat/declining COGS → potential obsolescence risk
|
||||
- Red flag threshold: Inventory growth > COGS growth + 10 percentage points
|
||||
|
||||
4. **One-time items**:
|
||||
- Identify significant non-recurring charges or gains in the income statement
|
||||
- Calculate adjusted net income excluding one-time items
|
||||
- Compare adjusted vs. reported margins
|
||||
|
||||
5. **Deferred revenue trend** (if applicable):
|
||||
- Growing deferred revenue is a positive signal (future revenue already contracted)
|
||||
- Declining deferred revenue may signal weakening demand pipeline
|
||||
|
||||
6. **External validation** (web search):
|
||||
- If any red flags were triggered above, search for corroborating or mitigating context:
|
||||
`web_search("[COMPANY] accounting concerns OR restatement OR SEC inquiry")`
|
||||
- Check for auditor changes (can signal accounting issues):
|
||||
`web_search("[COMPANY] auditor change OR audit opinion")`
|
||||
- Only run these searches if quantitative red flags exist. Do not search proactively for every company.
|
||||
|
||||
Summarize quality of earnings as: **High** / **Moderate** / **Low** with supporting evidence.
|
||||
|
||||
### Step 7: SEC Filing Qualitative Analysis
|
||||
|
||||
Pull and analyze the most recent annual or quarterly filing:
|
||||
|
||||
1. **Get filing list**:
|
||||
```
|
||||
action: "get_filings"
|
||||
params: { ticker: "[TICKER]", filing_type: "10-K", limit: 1 }
|
||||
```
|
||||
If 10-K is not recent enough, also pull 10-Q:
|
||||
```
|
||||
action: "get_filings"
|
||||
params: { ticker: "[TICKER]", filing_type: "10-Q", limit: 1 }
|
||||
```
|
||||
|
||||
2. **Read MD&A section** (Management's Discussion and Analysis):
|
||||
```
|
||||
action: "get_filing_items"
|
||||
params: { ticker: "[TICKER]", filing_type: "10-K", item: "7" }
|
||||
```
|
||||
For 10-Q, MD&A is item "2":
|
||||
```
|
||||
action: "get_filing_items"
|
||||
params: { ticker: "[TICKER]", filing_type: "10-Q", item: "2" }
|
||||
```
|
||||
|
||||
3. **Read Risk Factors**:
|
||||
```
|
||||
action: "get_filing_items"
|
||||
params: { ticker: "[TICKER]", filing_type: "10-K", item: "1A" }
|
||||
```
|
||||
|
||||
4. **Extract and analyze**:
|
||||
- Management's explanation of revenue and margin trends
|
||||
- Forward-looking statements and guidance
|
||||
- Key risk factors that could impact financial health
|
||||
- Any disclosures about accounting policy changes
|
||||
- Cross-validate: Does management narrative align with the quantitative data from Steps 2-4?
|
||||
- Flag contradictions between management tone and actual numbers
|
||||
|
||||
5. **Supplement with earnings call transcript** (REQUIRED — web search/fetch):
|
||||
You MUST search for and incorporate the most recent earnings call. This is critical for understanding management's forward-looking view.
|
||||
- Search for the transcript:
|
||||
`web_search("[COMPANY] [QUARTER] [YEAR] earnings call transcript")`
|
||||
- If a transcript URL is found, use `web_fetch` to read key sections (CEO/CFO prepared remarks, Q&A highlights).
|
||||
- Extract: forward guidance, segment-level commentary, management tone on competitive position, key analyst concerns.
|
||||
- Cross-reference earnings call statements with MD&A disclosures — flag any inconsistencies.
|
||||
|
||||
6. **Summarize key insights**:
|
||||
- What management says about the business trajectory
|
||||
- Material risks not visible in the numbers alone
|
||||
- Any changes in risk factors vs. prior filings (if noticeable)
|
||||
- Key analyst questions and management responses from earnings call (if available)
|
||||
|
||||
### Step 8: Peer Comparison (Conditional)
|
||||
|
||||
**Execute this step only when the user explicitly requests peer comparison or industry benchmarking.**
|
||||
|
||||
1. **Identify peers**:
|
||||
- Use the `sector` and `industry` from `get_company_facts`
|
||||
- Select 2-3 publicly traded competitors in the same industry
|
||||
- If the user specifies peers, use those instead
|
||||
|
||||
2. **Pull peer data** (for each peer):
|
||||
```
|
||||
action: "get_financial_metrics_snapshot"
|
||||
params: { ticker: "[PEER_TICKER]" }
|
||||
```
|
||||
```
|
||||
action: "get_income_statements"
|
||||
params: { ticker: "[PEER_TICKER]", period: "annual", limit: 1 }
|
||||
```
|
||||
```
|
||||
action: "get_balance_sheets"
|
||||
params: { ticker: "[PEER_TICKER]", period: "annual", limit: 1 }
|
||||
```
|
||||
|
||||
3. **Comparative table**:
|
||||
|
||||
| Metric | [TARGET] | [PEER 1] | [PEER 2] | [PEER 3] | Sector Avg |
|
||||
|--------|----------|----------|----------|----------|------------|
|
||||
| Revenue Growth (YoY) | | | | | |
|
||||
| Gross Margin | | | | | |
|
||||
| Net Margin | | | | | |
|
||||
| ROE | | | | | |
|
||||
| D/E Ratio | | | | | |
|
||||
| FCF Margin | | | | | |
|
||||
| P/E Ratio | | | | | |
|
||||
|
||||
4. **Competitive position assessment**:
|
||||
- Where does the target company rank among peers on each metric?
|
||||
- Identify clear advantages and disadvantages relative to peers
|
||||
- Note if the target trades at a premium or discount to peers and whether it's justified
|
||||
|
||||
### Step 9: Present Findings
|
||||
|
||||
Compile the full analysis into a structured report. Follow this exact structure:
|
||||
|
||||
#### 1. Executive Summary
|
||||
- Company name, ticker, sector, current price
|
||||
- One-paragraph thesis: Is this a financially healthy company with a durable competitive advantage?
|
||||
- Financial health rating from Buffett scorecard (Excellent / Good / Average / Weak)
|
||||
- Earnings quality assessment (High / Moderate / Low)
|
||||
|
||||
#### 2. Financial Health Scorecard
|
||||
- Full Buffett checklist scorecard table from Step 5
|
||||
- Total score and rating
|
||||
|
||||
#### 3. Trend Dashboard
|
||||
- 5-year key metrics trend table from Steps 2-4:
|
||||
|
||||
| Metric | Y1 | Y2 | Y3 | Y4 | Y5 | Trend |
|
||||
|--------|----|----|----|----|----|----|
|
||||
| Revenue | | | | | | arrow |
|
||||
| Gross Margin | | | | | | arrow |
|
||||
| Net Margin | | | | | | arrow |
|
||||
| ROE | | | | | | arrow |
|
||||
| D/E Ratio | | | | | | arrow |
|
||||
| FCF | | | | | | arrow |
|
||||
| OCF/NI | | | | | | arrow |
|
||||
| CapEx/NI | | | | | | arrow |
|
||||
|
||||
Use directional indicators in the Trend column.
|
||||
|
||||
#### 4. Quality of Earnings
|
||||
- Summary from Step 6 with key metrics and assessment
|
||||
|
||||
#### 5. Key Strengths & Red Flags
|
||||
- **Strengths**: List 3-5 financial strengths with supporting data
|
||||
- **Red Flags**: List any warning signs discovered during analysis. If none, state "No material red flags identified."
|
||||
|
||||
Common red flags to watch for:
|
||||
- Revenue growth but declining margins
|
||||
- Net income growing but OCF declining
|
||||
- AR growing faster than revenue
|
||||
- Inventory building up vs. flat COGS
|
||||
- Rising debt with declining interest coverage
|
||||
- Retained earnings declining
|
||||
- Large goodwill relative to total assets
|
||||
- CapEx consistently > 50% of net income
|
||||
- Management tone in MD&A contradicts financial data
|
||||
|
||||
#### 6. SEC Filing Insights
|
||||
- Key findings from Step 7
|
||||
- Management's outlook and material risks
|
||||
|
||||
#### 7. Peer Comparison (if Step 8 was executed)
|
||||
- Comparative table and competitive position assessment
|
||||
|
||||
### Guardrails
|
||||
|
||||
- Always state the date range of financial data used.
|
||||
- If any data is missing or unavailable, explicitly note it and adjust the analysis scope.
|
||||
- Do not present calculated ratios as precise — round to one decimal place.
|
||||
- Clearly distinguish between facts (from data) and interpretive conclusions.
|
||||
- The Buffett scorecard is a screening framework, not a buy/sell recommendation. State this in the output.
|
||||
- For non-US companies or companies not filing with the SEC, skip Step 7 and note the limitation.
|
||||
- Output language must match the user's input language (Chinese input → Chinese output, English input → English output).
|
||||
|
||||
### Web Search Requirements
|
||||
|
||||
**Minimum mandatory searches (you MUST perform these):**
|
||||
1. Earnings call highlights (Step 1b) — for management's own explanation of results
|
||||
2. Industry outlook (Step 1b) — for macro/sector context
|
||||
3. Earnings call transcript (Step 7) — for forward guidance and analyst Q&A
|
||||
|
||||
**Additional searches (trigger when data shows anomalies):**
|
||||
- Revenue or margin inflection points (Steps 2-4)
|
||||
- Major debt changes or acquisitions (Step 3)
|
||||
- CapEx spikes (Step 4)
|
||||
- Quality-of-earnings red flags (Step 6)
|
||||
|
||||
**Search principles:**
|
||||
- **Source quality**: Prefer primary sources (SEC filings, company press releases, earnings call transcripts) over secondary sources (analyst blogs, news aggregators).
|
||||
- **Cite with dates**: Always include source name and date when referencing external information.
|
||||
- **Separate fact from opinion**: Label analyst or media commentary as external opinion, not fact.
|
||||
- **Total budget**: Expect 3-8 web searches per analysis. Fewer than 3 means you are likely missing critical context.
|
||||
99
skills/earnings-analysis/references/buffett-checklist.md
Normal file
99
skills/earnings-analysis/references/buffett-checklist.md
Normal file
|
|
@ -0,0 +1,99 @@
|
|||
# Buffett Competitive Advantage Checklist
|
||||
|
||||
Score each criterion and calculate a total. Use this to assess whether a company has a durable competitive advantage (economic moat).
|
||||
|
||||
## Scoring System
|
||||
|
||||
Total: 100 points across 4 categories (25 points each).
|
||||
|
||||
### Category 1: Profitability (25 points)
|
||||
|
||||
| # | Criterion | Excellent | Good | Weak |
|
||||
|---|-----------|-----------|------|------|
|
||||
| 1 | **Gross Margin** | > 40% → **10 pts** | 30-40% → **6 pts** | < 30% → **2 pts** |
|
||||
| 2 | **Net Margin** | > 20% → **10 pts** | 10-20% → **6 pts** | < 10% → **2 pts** |
|
||||
| 3 | **Return on Equity (ROE)** | > 15% → **5 pts** | 10-15% → **3 pts** | < 10% → **1 pt** |
|
||||
|
||||
How to calculate:
|
||||
- Gross Margin = Gross Profit / Revenue
|
||||
- Net Margin = Net Income / Revenue
|
||||
- ROE = Net Income / Total Shareholders' Equity
|
||||
- Use the most recent annual figures; cross-check with 5-year average
|
||||
|
||||
### Category 2: Balance Sheet Health (25 points)
|
||||
|
||||
| # | Criterion | Pass | Partial | Fail |
|
||||
|---|-----------|------|---------|------|
|
||||
| 4 | **Cash > Total Debt** | Yes → **8 pts** | Cash > 50% of Debt → **4 pts** | Cash < 50% of Debt → **1 pt** |
|
||||
| 5 | **Debt-to-Equity Ratio** | < 0.8 → **7 pts** | 0.8-1.5 → **4 pts** | > 1.5 → **1 pt** |
|
||||
| 6 | **No Preferred Stock** | None → **5 pts** | — | Has Preferred → **0 pts** |
|
||||
| 7 | **Retained Earnings Growth** | Growing 5 consecutive years → **5 pts** | Growing 3-4 years → **3 pts** | Declining or flat → **1 pt** |
|
||||
|
||||
How to calculate:
|
||||
- Cash = Cash and Cash Equivalents + Short-term Investments
|
||||
- Total Debt = Short-term Debt + Long-term Debt
|
||||
- D/E = Total Liabilities / Total Shareholders' Equity
|
||||
- Retained Earnings: Compare year-over-year from balance sheets
|
||||
|
||||
Special note on D/E:
|
||||
- Exclude operating lease liabilities from "debt" for this assessment (they are contractual obligations, not financial debt)
|
||||
- If treasury stock is large, it reduces equity and inflates D/E — note this in analysis
|
||||
|
||||
### Category 3: Cash Flow Quality (25 points)
|
||||
|
||||
| # | Criterion | Excellent | Good | Weak |
|
||||
|---|-----------|-----------|------|------|
|
||||
| 8 | **CapEx / Net Income** | < 25% → **10 pts** | 25-50% → **6 pts** | > 50% → **2 pts** |
|
||||
| 9 | **Operating CF > Net Income** | OCF/NI > 1.0 → **8 pts** | OCF/NI = 0.8-1.0 → **4 pts** | OCF/NI < 0.8 → **1 pt** |
|
||||
| 10 | **Shareholder Returns** | Buybacks + Dividends → **7 pts** | Dividends only → **4 pts** | Neither → **1 pt** |
|
||||
|
||||
How to calculate:
|
||||
- CapEx: Capital Expenditure from cash flow statement (use absolute value)
|
||||
- Operating CF: Net Cash from Operating Activities
|
||||
- Buybacks: Check if Treasury Stock increased year-over-year, or look at "repurchase of common stock" in financing activities
|
||||
- Dividends: Look at "dividends paid" in financing activities
|
||||
|
||||
Note on CapEx:
|
||||
- One-time large CapEx (e.g., new factory, data center buildout) should be noted but not penalized if the 5-year average CapEx/NI is still within range
|
||||
- Asset-light businesses (software, services) naturally score well here
|
||||
|
||||
### Category 4: Consistency (25 points)
|
||||
|
||||
| # | Criterion | Excellent | Good | Weak |
|
||||
|---|-----------|-----------|------|------|
|
||||
| 11 | **Revenue Growth Streak** | 5+ consecutive years growing → **10 pts** | 3-4 years → **6 pts** | < 3 years → **2 pts** |
|
||||
| 12 | **Net Income Growth Streak** | 5+ consecutive years growing → **10 pts** | 3-4 years → **6 pts** | < 3 years → **2 pts** |
|
||||
| 13 | **Recession Resilience** | Profitable through last recession → **5 pts** | Revenue dip < 10% → **3 pts** | Significant losses → **1 pt** |
|
||||
|
||||
How to assess:
|
||||
- Revenue/NI growth: Check year-over-year changes for the last 5 years
|
||||
- Recession resilience: Check 2020 (COVID) and 2022 (rate hikes) performance. For older data, check 2008-2009 if available.
|
||||
- A single flat year in an otherwise consistent growth streak can be scored as "Good"
|
||||
|
||||
## Score Interpretation
|
||||
|
||||
| Total Score | Rating | Interpretation |
|
||||
|-------------|--------|----------------|
|
||||
| 80-100 | **Excellent** | Strong durable competitive advantage. Consistent profitability, fortress balance sheet, capital-light operations. Classic Buffett-style investment candidate. |
|
||||
| 60-79 | **Good** | Solid business with some competitive advantages. May have minor weaknesses in one category. Worth deeper investigation. |
|
||||
| 40-59 | **Average** | Mediocre competitive position. Multiple areas of concern. Higher risk of margin erosion or competitive disruption. |
|
||||
| < 40 | **Weak** | No clear competitive advantage. High debt, inconsistent earnings, or capital-intensive operations. Not a typical Buffett investment. |
|
||||
|
||||
## Sector-Specific Caveats
|
||||
|
||||
- **Financials**: Skip gross margin (criterion 1). Use net interest margin > 3% as substitute for 10 pts. D/E ratio thresholds don't apply — use Tier 1 Capital Ratio > 10% for 7 pts instead.
|
||||
- **Utilities**: Naturally capital-intensive (CapEx criterion will score low). Offset by checking regulated return stability. If regulated ROE is consistently 9-11%, award 6 pts for criterion 8.
|
||||
- **REITs**: Required to pay out 90%+ as dividends, so retained earnings won't grow. Skip criterion 7; award 5 pts if FFO per share grows consistently instead.
|
||||
- **Growth-stage Tech**: May not yet have 5 years of profitability. Score consistency based on revenue growth and gross margin expansion trajectory. Note that the overall score may be artificially low.
|
||||
|
||||
## Output Format
|
||||
|
||||
Present the scorecard as a table:
|
||||
|
||||
| # | Criterion | Value | Score | Max |
|
||||
|---|-----------|-------|-------|-----|
|
||||
| 1 | Gross Margin | 43.2% | 10 | 10 |
|
||||
| 2 | Net Margin | 25.1% | 10 | 10 |
|
||||
| ... | ... | ... | ... | ... |
|
||||
| | **Total** | | **XX** | **100** |
|
||||
| | **Rating** | | **Excellent/Good/Average/Weak** | |
|
||||
|
|
@ -0,0 +1,70 @@
|
|||
# Financial Ratios Benchmarks by Sector
|
||||
|
||||
Use the company's `sector` from `get_company_facts` to look up benchmark ranges below. Compare the company's ratios against these benchmarks and note deviations.
|
||||
|
||||
## Profitability Benchmarks
|
||||
|
||||
| Sector | Gross Margin | Operating Margin | Net Margin | ROE | ROA |
|
||||
|--------|-------------|-----------------|------------|-----|-----|
|
||||
| Communication Services | 50-60% | 15-25% | 10-18% | 12-20% | 5-10% |
|
||||
| Consumer Discretionary | 35-50% | 8-15% | 5-10% | 15-25% | 5-10% |
|
||||
| Consumer Staples | 35-45% | 12-18% | 8-12% | 20-30% | 8-12% |
|
||||
| Energy | 30-50% | 10-20% | 5-15% | 10-20% | 5-10% |
|
||||
| Financials | N/A | 25-35% | 15-25% | 10-15% | 1-2% |
|
||||
| Health Care | 55-70% | 15-25% | 10-20% | 15-25% | 8-12% |
|
||||
| Industrials | 25-35% | 10-15% | 6-10% | 15-20% | 5-8% |
|
||||
| Information Technology | 55-70% | 20-30% | 15-25% | 20-35% | 10-15% |
|
||||
| Materials | 25-35% | 10-18% | 5-12% | 10-18% | 5-8% |
|
||||
| Real Estate | 55-70% | 25-40% | 15-30% | 5-10% | 2-5% |
|
||||
| Utilities | 35-50% | 15-25% | 8-15% | 8-12% | 3-5% |
|
||||
|
||||
## Balance Sheet Benchmarks
|
||||
|
||||
| Sector | Current Ratio | Quick Ratio | D/E Ratio | Interest Coverage |
|
||||
|--------|--------------|-------------|-----------|-------------------|
|
||||
| Communication Services | 1.0-1.5 | 0.8-1.2 | 0.8-1.5 | 4-8x |
|
||||
| Consumer Discretionary | 1.2-2.0 | 0.8-1.5 | 0.5-1.2 | 5-10x |
|
||||
| Consumer Staples | 1.0-1.5 | 0.6-1.0 | 0.5-1.0 | 8-15x |
|
||||
| Energy | 1.0-1.5 | 0.8-1.2 | 0.3-0.8 | 5-10x |
|
||||
| Financials | N/A | N/A | 2.0-8.0 | N/A |
|
||||
| Health Care | 1.5-2.5 | 1.2-2.0 | 0.3-0.8 | 8-15x |
|
||||
| Industrials | 1.2-2.0 | 0.8-1.5 | 0.5-1.0 | 6-12x |
|
||||
| Information Technology | 2.0-3.5 | 1.5-3.0 | 0.2-0.6 | 15-30x |
|
||||
| Materials | 1.5-2.5 | 1.0-1.5 | 0.4-0.8 | 6-12x |
|
||||
| Real Estate | 1.0-1.5 | 0.5-1.0 | 0.8-1.5 | 3-5x |
|
||||
| Utilities | 0.8-1.2 | 0.5-0.8 | 1.0-2.0 | 3-5x |
|
||||
|
||||
## Cash Flow Benchmarks
|
||||
|
||||
| Sector | FCF Margin | CapEx/Revenue | Op. CF / Net Income |
|
||||
|--------|-----------|---------------|---------------------|
|
||||
| Communication Services | 10-20% | 10-20% | 1.2-1.8x |
|
||||
| Consumer Discretionary | 5-12% | 3-8% | 1.1-1.5x |
|
||||
| Consumer Staples | 8-15% | 3-6% | 1.2-1.5x |
|
||||
| Energy | 5-15% | 15-30% | 1.5-2.5x |
|
||||
| Financials | N/A | 1-3% | N/A |
|
||||
| Health Care | 15-25% | 3-8% | 1.2-1.8x |
|
||||
| Industrials | 5-12% | 3-8% | 1.2-1.6x |
|
||||
| Information Technology | 20-35% | 3-10% | 1.2-1.8x |
|
||||
| Materials | 5-12% | 5-12% | 1.3-2.0x |
|
||||
| Real Estate | 15-30% | 5-15% | 1.5-3.0x |
|
||||
| Utilities | 5-10% | 15-25% | 2.0-3.5x |
|
||||
|
||||
## Usage Notes
|
||||
|
||||
- **Financials sector**: Gross margin and current/quick ratios are not meaningful for banks and insurers. Use net interest margin and capital adequacy ratios instead.
|
||||
- **Real Estate**: High depreciation makes net margin less useful. Focus on Funds From Operations (FFO).
|
||||
- **Growth-stage companies**: May have negative margins. Compare against growth-stage peers rather than mature sector benchmarks.
|
||||
- **Cyclical sectors** (Energy, Materials, Industrials): Use cycle-average margins (5-7 years) rather than single-year comparisons.
|
||||
- **Post-M&A**: Goodwill and amortization may distort margins for 1-2 years after acquisitions. Note any large acquisitions.
|
||||
|
||||
## Buffett's Rules of Thumb (Quick Reference)
|
||||
|
||||
| Metric | Excellent | Good | Weak |
|
||||
|--------|-----------|------|------|
|
||||
| Gross Margin | > 40% | 30-40% | < 30% |
|
||||
| Net Margin | > 20% | 10-20% | < 10% |
|
||||
| ROE | > 15% | 10-15% | < 10% |
|
||||
| D/E Ratio | < 0.5 | 0.5-0.8 | > 0.8 |
|
||||
| CapEx / Net Income | < 25% | 25-50% | > 50% |
|
||||
| Debt Payoff (years) | < 2 | 2-4 | > 4 |
|
||||
Loading…
Add table
Add a link
Reference in a new issue