From d3ef8ecc31262046da7b6199391d6ce00895e322 Mon Sep 17 00:00:00 2001 From: Jiayuan Zhang Date: Sun, 15 Feb 2026 22:08:48 +0800 Subject: [PATCH 1/4] feat(agent): replace sessions_spawn with synchronous delegate tool Replace the async sessions_spawn/sessions_list sub-agent system with a single synchronous `delegate` tool. The new tool runs tasks in parallel via Promise.all with per-task timeout, returning combined results directly in the tool response. This eliminates the need for registry, announce queue, persistence, and Hub involvement. Co-Authored-By: Claude Opus 4.6 --- packages/core/src/agent/index.ts | 3 + packages/core/src/agent/run-log.ts | 10 + packages/core/src/agent/runner.ts | 11 +- .../core/src/agent/system-prompt/sections.ts | 66 ++-- packages/core/src/agent/tools.ts | 42 ++- packages/core/src/agent/tools/delegate.ts | 318 ++++++++++++++++++ packages/core/src/agent/tools/groups.ts | 6 +- packages/core/src/agent/tools/index.ts | 2 +- 8 files changed, 392 insertions(+), 66 deletions(-) create mode 100644 packages/core/src/agent/tools/delegate.ts diff --git a/packages/core/src/agent/index.ts b/packages/core/src/agent/index.ts index 6dccd6f7..01488856 100644 --- a/packages/core/src/agent/index.ts +++ b/packages/core/src/agent/index.ts @@ -13,6 +13,8 @@ export * from "./tools.js"; export * from "./tools/policy.js"; export * from "./tools/groups.js"; export * from "./extract-text.js"; +// @deprecated — Old subagent registry. Use `delegate` tool instead. +// Kept temporarily for desktop app compatibility. export { listSubagentRuns, getSubagentRun, @@ -23,6 +25,7 @@ export type { SubagentRunOutcome, SubagentGroup, } from "./subagent/types.js"; + export { readClaudeCliCredentials, readCodexCliCredentials, diff --git a/packages/core/src/agent/run-log.ts b/packages/core/src/agent/run-log.ts index 18bf5c80..29b52585 100644 --- a/packages/core/src/agent/run-log.ts +++ b/packages/core/src/agent/run-log.ts @@ -45,6 +45,16 @@ * - `compaction_detail` — Detailed compaction breakdown. * Fields: pre_pruning_tokens, post_compaction_tokens, messages_removed, reason, pruning_applied * + * ### Delegation (Sub-Agents) + * - `delegate_start` — Delegate tool invoked. + * Fields: task_count, timeout_seconds, labels[] + * - `delegate_task_start` — Individual sub-agent task begins. + * Fields: index, label, task (first 200 chars) + * - `delegate_task_end` — Individual sub-agent task completes. + * Fields: index, label, status (ok|error|timeout), duration_ms, findings_chars, error? + * - `delegate_end` — All delegated tasks complete. + * Fields: task_count, ok, errors, timeouts, total_duration_ms + * * ### Error Recovery * - `context_overflow` — Context window overflow detected. * Fields: attempt, messages_before diff --git a/packages/core/src/agent/runner.ts b/packages/core/src/agent/runner.ts index e5f6b2b8..c812b4a1 100644 --- a/packages/core/src/agent/runner.ts +++ b/packages/core/src/agent/runner.ts @@ -393,11 +393,10 @@ export class Agent { const mergedToolsConfig = mergeToolsConfig(profileToolsConfig, options.tools); const profileDir = this.profile?.getProfileDir(); // Use this.sessionId (which may be auto-generated) instead of options.sessionId - // (which may be undefined). Without this, sessions_list and sessions_spawn - // can't find sub-agent runs because they have no session context. + // (which may be undefined). Without this, delegate tool has no session context. this.toolsOptions = mergedToolsConfig - ? { ...options, sessionId: this.sessionId, cwd: effectiveCwd, tools: mergedToolsConfig, profileDir, provider: this.resolvedProvider } - : { ...options, sessionId: this.sessionId, cwd: effectiveCwd, profileDir, provider: this.resolvedProvider }; + ? { ...options, sessionId: this.sessionId, cwd: effectiveCwd, tools: mergedToolsConfig, profileDir, provider: this.resolvedProvider, runLog: this.runLog } + : { ...options, sessionId: this.sessionId, cwd: effectiveCwd, profileDir, provider: this.resolvedProvider, runLog: this.runLog }; const tools = resolveTools(this.toolsOptions); if (this.debug) { @@ -1242,10 +1241,10 @@ export class Agent { // Update internal state this.resolvedProvider = providerId; - // Keep toolsOptions.provider in sync so sessions_spawn inherits the current provider + // Keep toolsOptions.provider in sync so delegate tool inherits the current provider this.toolsOptions = { ...this.toolsOptions, provider: providerId }; - // Reload tools so sessions_spawn picks up the new provider in its closure. + // Reload tools so delegate picks up the new provider in its closure. // Without this, the existing tool instance still captures the old provider. const tools = resolveTools(this.toolsOptions); this.agent.setTools(tools); diff --git a/packages/core/src/agent/system-prompt/sections.ts b/packages/core/src/agent/system-prompt/sections.ts index fae47535..bf935fce 100644 --- a/packages/core/src/agent/system-prompt/sections.ts +++ b/packages/core/src/agent/system-prompt/sections.ts @@ -28,7 +28,7 @@ const CORE_TOOL_SUMMARIES: Record = { web_search: "Search the web via Devv Search", web_fetch: "Fetch and extract readable content from a URL", memory_search: "Search memory files by keyword", - sessions_spawn: "Spawn a sub-agent session", + delegate: "Run tasks in parallel via sub-agents", data: "Query structured financial and market data", }; @@ -43,7 +43,7 @@ const TOOL_ORDER = [ "web_search", "web_fetch", "memory_search", - "sessions_spawn", + "delegate", "data", ]; @@ -313,52 +313,34 @@ export function buildConditionalToolSections( ); } - // Subagent tools (full mode only — minimal agents cannot spawn) - if (mode === "full" && toolSet.has("sessions_spawn")) { + // Delegate tool (full mode only — sub-agents cannot delegate) + if (mode === "full" && toolSet.has("delegate")) { lines.push( - "## Sub-Agents", - "If a task is complex or long-running, spawn a sub-agent. It will do the work and report back when done.", + "## Delegation (Sub-Agents)", + "Use `delegate` to run tasks in parallel via isolated sub-agents. " + + "Each task gets its own agent with full tool access. Results are returned directly when all tasks complete.", "", - "### Critical Rules", - "- **NEVER fabricate, guess, or make up data that a sub-agent has not yet returned.** " + - "This includes completion status — do NOT claim tasks are done until you receive actual results.", - "- After spawning, do NOT proceed with work that depends on the sub-agent results. " + - "You can still chat with the user, do unrelated tasks, or explain what the sub-agents are working on.", - "- Sub-agents cannot spawn nested sub-agents.", - "- You can use `sessions_list` to check sub-agent status if needed.", - "", - "### Groups and Continuation (`next`) — ALWAYS use for multi-agent tasks", - "When spawning multiple sub-agents, **always** use `next` to define the follow-up work. " + - "This is the standard pattern — do NOT use bare `announce: \"silent\"` for multi-agent collect-then-act workflows.", + "### When to Use", + "- Collecting data from multiple independent sources (e.g. research 3 stocks simultaneously)", + "- Comparative analysis that can be parallelized (e.g. analyze 5 error logs in parallel)", + "- Any task where independent sub-tasks would benefit from parallel execution", "", + "### Example", "```", - "// First spawn — creates a group automatically, returns groupId", - 'sessions_spawn({ task: "Get AAPL financials", next: "Summarize all data and write a PDF report", label: "AAPL" })', - "// → { groupId: \"grp-abc\", runId: \"...\" }", - "", - "// Subsequent spawns — join the same group", - 'sessions_spawn({ task: "Get MSFT financials", groupId: "grp-abc", label: "MSFT" })', - 'sessions_spawn({ task: "Get GOOG financials", groupId: "grp-abc", label: "GOOG" })', + "delegate({", + ' tasks: [', + ' { task: "Research AAPL financials: revenue, net income, stock price", label: "AAPL" },', + ' { task: "Research MSFT financials: revenue, net income, stock price", label: "MSFT" },', + ' { task: "Research GOOG financials: revenue, net income, stock price", label: "GOOG" }', + " ]", + "})", "```", "", - "The system waits for ALL runs in the group to complete, then delivers the combined findings " + - "plus the `next` continuation prompt back to you. You can then use tools (write files, call APIs, etc.) " + - "to complete the follow-up work. The user is NOT blocked during this process — they can keep chatting.", - "", - "Use `next` whenever the user's request involves: collect data → then act on it (summarize, analyze, generate files).", - "Without `next`, findings are summarized but no further action is taken.", - "", - "### Announce Modes (when not using groups)", - "- `announce: \"immediate\"` (default): findings delivered per sub-agent as each completes.", - "- `announce: \"silent\"`: all findings held until every silent sub-agent finishes, then delivered together.", - "Groups always use silent collection internally — you don't need to set announce when using groupId.", - "", - "### Timeout Guidelines", - "Set timeoutSeconds generously — a sub-agent that times out loses all its work.", - "- Simple tasks (search, read, summarize): 1800 (30 min, the default)", - "- Moderate tasks (multi-step research, file downloads + analysis): 1800–2400 (30–40 min)", - "- Complex tasks (code generation, PDF creation, multi-file operations): 2400–3600 (40–60 min)", - "When in doubt, use a longer timeout.", + "### Rules", + "- Each sub-agent task should be self-contained — include all context needed in the task description.", + "- Sub-agents cannot delegate further (no nesting).", + "- The tool blocks until all tasks complete — plan your workflow accordingly.", + "- Set `timeoutSeconds` generously for complex tasks (default: 1800 = 30 min).", "", ); } diff --git a/packages/core/src/agent/tools.ts b/packages/core/src/agent/tools.ts index fd9bf10a..83a61876 100644 --- a/packages/core/src/agent/tools.ts +++ b/packages/core/src/agent/tools.ts @@ -6,8 +6,7 @@ import { createExecTool } from "./tools/exec.js"; import { createProcessTool } from "./tools/process.js"; import { createGlobTool } from "./tools/glob.js"; import { createWebFetchTool, createWebSearchTool } from "./tools/web/index.js"; -import { createSessionsSpawnTool } from "./tools/sessions-spawn.js"; -import { createSessionsListTool } from "./tools/sessions-list.js"; +import { createDelegateTool } from "./tools/delegate.js"; import { createMemorySearchTool } from "./tools/memory-search.js"; import { createCronTool } from "./tools/cron/index.js"; import { createDataTool } from "./tools/data/index.js"; @@ -26,12 +25,20 @@ export interface CreateToolsOptions { cwd: string; /** Profile directory for memory_search tool (optional) */ profileDir?: string | undefined; - /** Whether this agent is a subagent (passed to sessions_spawn tool) */ + /** Whether this agent is a subagent (passed to delegate tool) */ isSubagent?: boolean | undefined; - /** Session ID of the agent (passed to sessions_spawn tool) */ + /** Session ID of the agent (passed to delegate tool) */ sessionId?: string | undefined; - /** Resolved provider ID of the parent agent (passed to sessions_spawn for subagent inheritance) */ + /** Resolved provider ID of the parent agent (inherited by sub-agents) */ provider?: string | undefined; + /** Model name (inherited by sub-agents) */ + model?: string | undefined; + /** API key (inherited by sub-agents) */ + apiKey?: string | undefined; + /** Whether run-log is enabled (passed to child agents) */ + enableRunLog?: boolean | undefined; + /** Run-log instance for delegate events */ + runLog?: import("./run-log.js").RunLog | undefined; /** Callback invoked when exec tool needs approval before running a command */ onExecApprovalNeeded?: ExecApprovalCallback | undefined; /** Callback for sending files through messaging channels */ @@ -143,17 +150,18 @@ export function createAllTools(options: CreateToolsOptions | string): AgentTool< tools.push(sendFileTool as AgentTool); } - // Add sessions_spawn tool (will be filtered by policy for subagents) - const sessionsSpawnTool = createSessionsSpawnTool({ + // Add delegate tool (will be filtered by policy for subagents) + const delegateTool = createDelegateTool({ isSubagent: isSubagent ?? false, - ...(sessionId !== undefined ? { sessionId } : {}), - ...(opts.provider !== undefined ? { provider: opts.provider } : {}), + sessionId, + provider: opts.provider, + model: opts.model, + apiKey: opts.apiKey, + cwd, + runLog: opts.runLog, + enableRunLog: opts.enableRunLog, }); - tools.push(sessionsSpawnTool as AgentTool); - - // Add sessions_list tool - const sessionsListTool = createSessionsListTool({ ...(sessionId !== undefined ? { sessionId } : {}) }); - tools.push(sessionsListTool as AgentTool); + tools.push(delegateTool as AgentTool); return tools; } @@ -162,6 +170,8 @@ export function createAllTools(options: CreateToolsOptions | string): AgentTool< export interface ResolveToolsOptions extends AgentOptions { /** Profile directory for memory_search tool (computed from profileId if not provided) */ profileDir?: string | undefined; + /** Run-log instance (forwarded to delegate tool) */ + runLog?: import("./run-log.js").RunLog | undefined; } /** @@ -183,6 +193,10 @@ export function resolveTools(options: ResolveToolsOptions): AgentTool[] { isSubagent: options.isSubagent, sessionId: options.sessionId, provider: options.provider, + model: options.model, + apiKey: options.apiKey, + enableRunLog: options.enableRunLog, + runLog: options.runLog, onExecApprovalNeeded: options.onExecApprovalNeeded, onChannelSendFile: options.onChannelSendFile, }); diff --git a/packages/core/src/agent/tools/delegate.ts b/packages/core/src/agent/tools/delegate.ts new file mode 100644 index 00000000..9c414006 --- /dev/null +++ b/packages/core/src/agent/tools/delegate.ts @@ -0,0 +1,318 @@ +/** + * delegate tool — run tasks in parallel via sub-agents. + * + * Synchronous from the LLM's perspective: the tool blocks until all + * sub-agent tasks complete (or timeout), then returns combined results + * directly in the tool response. No async infrastructure needed. + */ + +import { join } from "node:path"; +import { rmSync } from "node:fs"; +import { Writable } from "node:stream"; +import { Type } from "@sinclair/typebox"; +import type { AgentTool } from "@mariozechner/pi-agent-core"; +import { Agent } from "../runner.js"; +import type { RunLog } from "../run-log.js"; +import { DATA_DIR } from "@multica/utils"; + +const TaskItemSchema = Type.Object({ + task: Type.String({ description: "The task for the sub-agent to perform.", minLength: 1 }), + label: Type.Optional( + Type.String({ description: "Short human-readable label for this task (used in output headers)." }), + ), +}); + +const DelegateSchema = Type.Object({ + tasks: Type.Array(TaskItemSchema, { + description: "One or more tasks to run in parallel. Each spawns an isolated sub-agent.", + minItems: 1, + }), + timeoutSeconds: Type.Optional( + Type.Number({ + description: + "Per-task timeout in seconds. Default: 1800 (30 min). " + + "Set higher for complex tasks. The sub-agent is aborted if it exceeds this limit.", + minimum: 0, + }), + ), +}); + +type DelegateArgs = { + tasks: Array<{ task: string; label?: string }>; + timeoutSeconds?: number; +}; + +type TaskResult = { + index: number; + label: string; + status: "ok" | "error" | "timeout"; + durationMs: number; + findings: string; + error?: string; +}; + +export type DelegateResult = { + taskCount: number; + ok: number; + errors: number; + timeouts: number; + totalDurationMs: number; + tasks: TaskResult[]; +}; + +export interface CreateDelegateToolOptions { + /** Whether the current agent is itself a subagent */ + isSubagent?: boolean; + /** Session ID of the parent agent */ + sessionId?: string; + /** Resolved provider ID (inherited by sub-agents) */ + provider?: string; + /** Model override (inherited by sub-agents) */ + model?: string; + /** API key (inherited by sub-agents) */ + apiKey?: string; + /** Working directory (inherited by sub-agents) */ + cwd?: string; + /** Run-log instance for emitting delegate events */ + runLog?: RunLog; + /** Whether run-log is enabled (passed to child agents) */ + enableRunLog?: boolean; +} + +const DEFAULT_TIMEOUT_SECONDS = 1800; // 30 minutes + +function formatElapsed(ms: number): string { + const totalSeconds = Math.round(ms / 1000); + if (totalSeconds < 60) return `${totalSeconds}s`; + const minutes = Math.floor(totalSeconds / 60); + const seconds = totalSeconds % 60; + if (minutes < 60) return seconds > 0 ? `${minutes}m${seconds}s` : `${minutes}m`; + const hours = Math.floor(minutes / 60); + const remainingMinutes = minutes % 60; + return remainingMinutes > 0 ? `${hours}h${remainingMinutes}m` : `${hours}h`; +} + +/** + * Run a single sub-agent task with timeout. + */ +async function runSubagentTask( + taskDef: { task: string; label?: string }, + index: number, + timeoutMs: number, + parentOptions: CreateDelegateToolOptions, + runLog?: RunLog, +): Promise { + const label = taskDef.label || `Task ${index + 1}`; + const start = Date.now(); + + runLog?.log("delegate_task_start", { + index, + label, + task: taskDef.task.slice(0, 200), + }); + + const childAgent = new Agent({ + provider: parentOptions.provider, + model: parentOptions.model, + apiKey: parentOptions.apiKey, + cwd: parentOptions.cwd, + isSubagent: true, + enableSkills: false, + compactionMode: "tokens", + enableRunLog: parentOptions.enableRunLog, + // Suppress stdout/stderr output from child agents + logger: { + stdout: new NullStream(), + stderr: new NullStream(), + }, + }); + + try { + let result: { text: string; error?: string }; + let timedOut = false; + + if (timeoutMs > 0) { + // Race agent.run against timeout + result = await Promise.race([ + childAgent.run(taskDef.task), + new Promise((_, reject) => { + setTimeout(() => { + timedOut = true; + childAgent.abort(); + reject(new Error("timeout")); + }, timeoutMs); + }), + ]).catch((err) => { + if (timedOut) { + return { text: "", error: `Timed out after ${formatElapsed(timeoutMs)}` }; + } + throw err; + }); + } else { + // No timeout + result = await childAgent.run(taskDef.task); + } + + const durationMs = Date.now() - start; + const status = timedOut ? "timeout" : result.error ? "error" : "ok"; + + const taskResult: TaskResult = { + index, + label, + status, + durationMs, + findings: result.text || "(no output)", + error: result.error || undefined, + }; + + runLog?.log("delegate_task_end", { + index, + label, + status, + duration_ms: durationMs, + findings_chars: taskResult.findings.length, + error: taskResult.error, + }); + + return taskResult; + } catch (err) { + const durationMs = Date.now() - start; + const message = err instanceof Error ? err.message : String(err); + + const taskResult: TaskResult = { + index, + label, + status: "error", + durationMs, + findings: "", + error: message, + }; + + runLog?.log("delegate_task_end", { + index, + label, + status: "error", + duration_ms: durationMs, + findings_chars: 0, + error: message, + }); + + return taskResult; + } finally { + // Flush session writes before cleanup + await childAgent.flushSession(); + + // Clean up child session directory unless run-log is enabled + if (!parentOptions.enableRunLog) { + try { + const sessionDir = join(DATA_DIR, "sessions", childAgent.sessionId); + rmSync(sessionDir, { recursive: true, force: true }); + } catch { + // Best-effort cleanup + } + } + } +} + +export function createDelegateTool( + options: CreateDelegateToolOptions, +): AgentTool { + return { + name: "delegate", + label: "Delegate Tasks", + description: + "Run one or more tasks in parallel via isolated sub-agents. " + + "Each task gets its own agent with full tool access. " + + "Results are returned directly when all tasks complete. " + + "Use this for parallelizable work: multi-stock research, comparative analysis, " + + "data collection from multiple sources, or any task that benefits from parallel execution.", + parameters: DelegateSchema, + execute: async (_toolCallId, args) => { + const { tasks, timeoutSeconds } = args as DelegateArgs; + const timeoutMs = (timeoutSeconds ?? DEFAULT_TIMEOUT_SECONDS) * 1000; + + // Guard: sub-agents cannot delegate + if (options.isSubagent) { + return { + content: [{ type: "text", text: "Error: delegate is not allowed from sub-agent sessions." }], + details: { + taskCount: 0, + ok: 0, + errors: 1, + timeouts: 0, + totalDurationMs: 0, + tasks: [], + }, + }; + } + + const labels = tasks.map((t, i) => t.label || `Task ${i + 1}`); + + options.runLog?.log("delegate_start", { + task_count: tasks.length, + timeout_seconds: timeoutSeconds ?? DEFAULT_TIMEOUT_SECONDS, + labels, + }); + + const totalStart = Date.now(); + + // Run all tasks in parallel + const results = await Promise.all( + tasks.map((taskDef, index) => + runSubagentTask(taskDef, index, timeoutMs, options, options.runLog), + ), + ); + + const totalDurationMs = Date.now() - totalStart; + const ok = results.filter((r) => r.status === "ok").length; + const errors = results.filter((r) => r.status === "error").length; + const timeouts = results.filter((r) => r.status === "timeout").length; + + options.runLog?.log("delegate_end", { + task_count: tasks.length, + ok, + errors, + timeouts, + total_duration_ms: totalDurationMs, + }); + + // Format combined response + const statusLine = + `All ${tasks.length} task(s) completed: ${ok} succeeded, ${errors} failed, ${timeouts} timed out.\n` + + `Total wall time: ${formatElapsed(totalDurationMs)}`; + + const taskSections = results.map((r) => { + const statusTag = r.status === "ok" ? "OK" : r.status.toUpperCase(); + const header = `--- Task ${r.index + 1}: "${r.label}" [${statusTag}] (${formatElapsed(r.durationMs)}) ---`; + const body = r.status === "error" && r.error + ? `Error: ${r.error}\n${r.findings || ""}` + : r.findings; + return `${header}\n${body}`; + }); + + const responseText = `${statusLine}\n\n${taskSections.join("\n\n")}`; + + return { + content: [{ type: "text", text: responseText }], + details: { + taskCount: tasks.length, + ok, + errors, + timeouts, + totalDurationMs, + tasks: results, + }, + }; + }, + }; +} + +/** + * Writable stream that discards all output. + * Used to suppress child agent stdout/stderr. + */ +class NullStream extends Writable { + _write(_chunk: any, _encoding: string, callback: () => void): void { + callback(); + } +} diff --git a/packages/core/src/agent/tools/groups.ts b/packages/core/src/agent/tools/groups.ts index a2bbed12..465d6875 100644 --- a/packages/core/src/agent/tools/groups.ts +++ b/packages/core/src/agent/tools/groups.ts @@ -34,7 +34,7 @@ export const TOOL_GROUPS: Record = { "group:memory": ["memory_search"], // Subagent tools - "group:subagent": ["sessions_spawn", "sessions_list"], + "group:subagent": ["delegate"], // Cron/scheduling tools "group:cron": ["cron"], @@ -61,8 +61,8 @@ export const TOOL_GROUPS: Record = { * Subagents should not have access to session management or system tools. */ export const DEFAULT_SUBAGENT_TOOL_DENY: string[] = [ - // Subagents cannot spawn subagents (no nested spawning) - "sessions_spawn", + // Subagents cannot delegate (no nested delegation) + "delegate", ]; /** diff --git a/packages/core/src/agent/tools/index.ts b/packages/core/src/agent/tools/index.ts index 3e148e1a..bb90da24 100644 --- a/packages/core/src/agent/tools/index.ts +++ b/packages/core/src/agent/tools/index.ts @@ -9,7 +9,7 @@ export { createGlobTool } from "./glob.js"; export { createWebFetchTool, createWebSearchTool } from "./web/index.js"; export { createCronTool } from "./cron/index.js"; export { createDataTool } from "./data/index.js"; -export { createSessionsListTool } from "./sessions-list.js"; +export { createDelegateTool } from "./delegate.js"; // Tool groups export { From f60551195aaa03aa6b70033897c7edda0ce7b368 Mon Sep 17 00:00:00 2001 From: Jiayuan Zhang Date: Sun, 15 Feb 2026 22:08:58 +0800 Subject: [PATCH 2/4] chore(agent): remove old sessions_spawn/sessions_list tools and update references Delete sessions-spawn.ts, sessions-list.ts and their tests. Update CLI to remove waitForSubagents polling workaround (delegate is synchronous). Update UI, desktop IPC, SWE-bench, and system prompt tests to use the new delegate tool name. Co-Authored-By: Claude Opus 4.6 --- apps/cli/src/commands/run.ts | 46 +-- apps/desktop/src/main/ipc/agent.ts | 2 +- .../src/agent/system-prompt/builder.test.ts | 14 +- .../src/agent/system-prompt/sections.test.ts | 12 +- .../src/agent/tools/sessions-list.test.ts | 211 -------------- .../core/src/agent/tools/sessions-list.ts | 273 ------------------ .../src/agent/tools/sessions-spawn.test.ts | 61 ---- .../core/src/agent/tools/sessions-spawn.ts | 220 -------------- packages/ui/src/components/tool-call-item.tsx | 15 +- scripts/swe-bench/run.ts | 2 +- 10 files changed, 24 insertions(+), 832 deletions(-) delete mode 100644 packages/core/src/agent/tools/sessions-list.test.ts delete mode 100644 packages/core/src/agent/tools/sessions-list.ts delete mode 100644 packages/core/src/agent/tools/sessions-spawn.test.ts delete mode 100644 packages/core/src/agent/tools/sessions-spawn.ts diff --git a/apps/cli/src/commands/run.ts b/apps/cli/src/commands/run.ts index d6ee35d7..add43695 100644 --- a/apps/cli/src/commands/run.ts +++ b/apps/cli/src/commands/run.ts @@ -7,7 +7,7 @@ */ import { join } from "node:path"; -import { Agent, Hub, listSubagentRuns } from "@multica/core"; +import { Agent, Hub } from "@multica/core"; import type { AgentOptions } from "@multica/core"; import type { ToolsConfig } from "@multica/core"; import { DATA_DIR } from "@multica/utils"; @@ -238,52 +238,8 @@ export async function runCommand(args: string[]): Promise { console.error(`Error: ${result.error}`); process.exitCode = 1; } - - // Wait for sub-agents to complete and parent to process their results. - // Without this, CLI exits before sub-agent announcements are delivered. - await waitForSubagents(agent); } finally { hub.shutdown(); } } -/** - * Wait for any running sub-agents to complete, then output their findings. - * - * In CLI mode, the parent Agent is not registered with the Hub, so the normal - * announce flow (Hub → writeInternal) can't deliver results. Instead, we poll - * the registry and print findings directly once all sub-agents finish. - * - * Max wait: 30 minutes (matches default sub-agent timeout). - */ -async function waitForSubagents(agent: Agent): Promise { - const MAX_WAIT_MS = 30 * 60 * 1000; - const POLL_INTERVAL_MS = 2000; - const start = Date.now(); - - const allRuns = listSubagentRuns(agent.sessionId); - if (allRuns.length === 0) return; - - // Phase 1: Wait for all sub-agent runs to finish - while (Date.now() - start < MAX_WAIT_MS) { - const runs = listSubagentRuns(agent.sessionId); - const running = runs.filter((r) => !r.endedAt); - if (running.length === 0) break; - console.error(dim(`[waiting for ${running.length} sub-agent(s)...]`)); - await new Promise((r) => setTimeout(r, POLL_INTERVAL_MS)); - } - - // Phase 2: Output sub-agent findings directly (bypasses Hub announce flow) - const completedRuns = listSubagentRuns(agent.sessionId).filter((r) => r.endedAt); - if (completedRuns.length === 0) return; - - console.error(dim(`[${completedRuns.length} sub-agent(s) completed]`)); - - for (const run of completedRuns) { - const displayName = run.label || run.task.slice(0, 60); - const status = run.outcome?.status ?? "unknown"; - const findings = run.findings || "(no output)"; - console.log(`\n--- Sub-agent: ${displayName} [${status}] ---`); - console.log(findings); - } -} diff --git a/apps/desktop/src/main/ipc/agent.ts b/apps/desktop/src/main/ipc/agent.ts index 89f076a9..dc2189f2 100644 --- a/apps/desktop/src/main/ipc/agent.ts +++ b/apps/desktop/src/main/ipc/agent.ts @@ -13,7 +13,7 @@ const TOOL_GROUPS: Record = { 'group:runtime': ['exec', 'process'], 'group:web': ['web_search', 'web_fetch'], 'group:memory': ['memory_search'], - 'group:subagent': ['sessions_spawn'], + 'group:subagent': ['delegate'], 'group:cron': ['cron'], } diff --git a/packages/core/src/agent/system-prompt/builder.test.ts b/packages/core/src/agent/system-prompt/builder.test.ts index 21dce243..ef85cab4 100644 --- a/packages/core/src/agent/system-prompt/builder.test.ts +++ b/packages/core/src/agent/system-prompt/builder.test.ts @@ -11,7 +11,7 @@ const PROFILE = { config: { name: "TestAgent" }, }; -const TOOLS = ["read", "write", "edit", "glob", "exec", "sessions_spawn", "web_search"]; +const TOOLS = ["read", "write", "edit", "glob", "exec", "delegate", "web_search"]; describe("buildSystemPrompt", () => { // ── Full mode ───────────────────────────────────────────────────────── @@ -43,9 +43,9 @@ describe("buildSystemPrompt", () => { expect(result).toContain("## Tool Call Style"); }); - it("full mode includes sub-agents section when sessions_spawn present", () => { - const result = buildSystemPrompt({ mode: "full", tools: ["sessions_spawn"] }); - expect(result).toContain("## Sub-Agents"); + it("full mode includes delegation section when delegate present", () => { + const result = buildSystemPrompt({ mode: "full", tools: ["delegate"] }); + expect(result).toContain("## Delegation"); }); it("full mode includes web access section when web tools present", () => { @@ -131,9 +131,9 @@ describe("buildSystemPrompt", () => { expect(result).not.toContain("## Skills"); }); - it("minimal mode excludes sub-agents section even with sessions_spawn", () => { - const result = buildSystemPrompt({ mode: "minimal", tools: ["sessions_spawn"] }); - expect(result).not.toContain("## Sub-Agents"); + it("minimal mode excludes delegation section even with delegate", () => { + const result = buildSystemPrompt({ mode: "minimal", tools: ["delegate"] }); + expect(result).not.toContain("## Delegation"); }); // ── None mode ───────────────────────────────────────────────────────── diff --git a/packages/core/src/agent/system-prompt/sections.test.ts b/packages/core/src/agent/system-prompt/sections.test.ts index 71de0871..a1d2dd5a 100644 --- a/packages/core/src/agent/system-prompt/sections.test.ts +++ b/packages/core/src/agent/system-prompt/sections.test.ts @@ -167,14 +167,14 @@ describe("buildToolCallStyleSection", () => { }); describe("buildConditionalToolSections", () => { - it("includes sub-agents section when sessions_spawn present in full mode", () => { - const result = buildConditionalToolSections(["sessions_spawn"], "full"); - expect(result.join("\n")).toContain("## Sub-Agents"); + it("includes delegation section when delegate present in full mode", () => { + const result = buildConditionalToolSections(["delegate"], "full"); + expect(result.join("\n")).toContain("## Delegation"); }); - it("excludes sub-agents section in minimal mode", () => { - const result = buildConditionalToolSections(["sessions_spawn"], "minimal"); - expect(result.join("\n")).not.toContain("## Sub-Agents"); + it("excludes delegation section in minimal mode", () => { + const result = buildConditionalToolSections(["delegate"], "minimal"); + expect(result.join("\n")).not.toContain("## Delegation"); }); it("includes web access section when web tools present", () => { diff --git a/packages/core/src/agent/tools/sessions-list.test.ts b/packages/core/src/agent/tools/sessions-list.test.ts deleted file mode 100644 index 637a8537..00000000 --- a/packages/core/src/agent/tools/sessions-list.test.ts +++ /dev/null @@ -1,211 +0,0 @@ -import { describe, it, expect, beforeEach } from "vitest"; -import type { SubagentRunRecord } from "../subagent/types.js"; -import { resetSubagentRegistryForTests, seedSubagentRunForTests } from "../subagent/registry.js"; -import { createSessionsListTool } from "./sessions-list.js"; - -function makeRecord(overrides: Partial = {}): SubagentRunRecord { - return { - runId: "run-001", - childSessionId: "child-001", - requesterSessionId: "parent-001", - task: "Test task", - cleanup: "delete", - createdAt: 1700000000000, - ...overrides, - }; -} - -describe("sessions_list tool", () => { - beforeEach(() => { - resetSubagentRegistryForTests(); - }); - - it("returns empty message when no runs exist", async () => { - const tool = createSessionsListTool({ sessionId: "parent-001" }); - const result = await tool.execute("call-1", {}); - - expect(result.content[0]).toEqual({ - type: "text", - text: "No subagent runs for this session.", - }); - expect(result.details).toEqual({ runs: [] }); - }); - - it("lists multiple runs with correct status mapping", async () => { - const now = Date.now(); - seedSubagentRunForTests( - makeRecord({ - runId: "run-aaa", - label: "Code Review", - startedAt: now - 45000, - }), - ); - seedSubagentRunForTests( - makeRecord({ - runId: "run-bbb", - label: "Test Analysis", - startedAt: now - 60000, - endedAt: now - 30000, - outcome: { status: "ok" }, - findings: "All tests passed successfully.", - findingsCaptured: true, - }), - ); - seedSubagentRunForTests( - makeRecord({ - runId: "run-ccc", - label: "Lint Check", - startedAt: now - 60000, - endedAt: now, - outcome: { status: "error", error: "timeout" }, - findings: "Lint check timed out.", - findingsCaptured: true, - }), - ); - - const tool = createSessionsListTool({ sessionId: "parent-001" }); - const result = await tool.execute("call-1", {}); - - const text = result.content[0]!; - expect(text.type).toBe("text"); - expect((text as { text: string }).text).toContain("3 total"); - expect((text as { text: string }).text).toContain("[RUNNING]"); - expect((text as { text: string }).text).toContain("[OK]"); - expect((text as { text: string }).text).toContain("[ERROR]"); - expect((text as { text: string }).text).toContain("Code Review"); - expect((text as { text: string }).text).toContain("Test Analysis"); - expect((text as { text: string }).text).toContain("Lint Check"); - // Verify full runId is shown for completed runs - expect((text as { text: string }).text).toContain("id:run-aaa"); - expect((text as { text: string }).text).toContain("id:run-bbb"); - expect((text as { text: string }).text).toContain("id:run-ccc"); - // Verify findings are shown for completed runs - expect((text as { text: string }).text).toContain("All tests passed successfully."); - expect((text as { text: string }).text).toContain("Lint check timed out."); - - expect(result.details!.runs).toHaveLength(3); - expect(result.details!.runs[0]!.status).toBe("running"); - expect(result.details!.runs[1]!.status).toBe("ok"); - expect(result.details!.runs[2]!.status).toBe("error"); - }); - - it("returns detail for a specific runId", async () => { - const now = Date.now(); - seedSubagentRunForTests( - makeRecord({ - runId: "run-detail", - label: "Deep Analysis", - task: "Analyze the authentication module thoroughly", - startedAt: now - 90000, - endedAt: now - 10000, - outcome: { status: "ok" }, - findings: "Found 2 potential issues in token validation.", - findingsCaptured: true, - }), - ); - - const tool = createSessionsListTool({ sessionId: "parent-001" }); - const result = await tool.execute("call-1", { runId: "run-detail" }); - - const text = (result.content[0] as { text: string }).text; - expect(text).toContain("Run: run-detail"); - expect(text).toContain("Label: Deep Analysis"); - expect(text).toContain("Status: ok"); - expect(text).toContain("Found 2 potential issues"); - expect(text).toContain("Duration:"); - - expect(result.details!.runs).toHaveLength(1); - expect(result.details!.runs[0]!.runId).toBe("run-detail"); - }); - - it("returns not found for unknown runId", async () => { - const tool = createSessionsListTool({ sessionId: "parent-001" }); - const result = await tool.execute("call-1", { runId: "nonexistent" }); - - const text = (result.content[0] as { text: string }).text; - expect(text).toContain("Run not found"); - expect(result.details).toEqual({ runs: [] }); - }); - - it("rejects runId belonging to a different requester", async () => { - seedSubagentRunForTests( - makeRecord({ - runId: "run-other", - requesterSessionId: "other-parent", - }), - ); - - const tool = createSessionsListTool({ sessionId: "parent-001" }); - const result = await tool.execute("call-1", { runId: "run-other" }); - - const text = (result.content[0] as { text: string }).text; - expect(text).toContain("Run not found"); - expect(result.details).toEqual({ runs: [] }); - }); - - it("handles missing sessionId gracefully", async () => { - const tool = createSessionsListTool({}); - const result = await tool.execute("call-1", {}); - - const text = (result.content[0] as { text: string }).text; - expect(text).toContain("No session ID available"); - expect(result.details).toEqual({ runs: [] }); - }); - - it("shows findings for grouped completed runs", async () => { - const now = Date.now(); - const groupId = "group-001"; - seedSubagentRunForTests( - makeRecord({ - runId: "run-g1", - label: "Bull Case Research", - startedAt: now - 60000, - endedAt: now - 10000, - outcome: { status: "ok" }, - findings: "AI infrastructure capex growing 40% YoY.", - findingsCaptured: true, - groupId, - }), - ); - seedSubagentRunForTests( - makeRecord({ - runId: "run-g2", - label: "Bear Case Research", - startedAt: now - 60000, - endedAt: now - 5000, - outcome: { status: "ok" }, - findings: "Valuation risk: forward P/E above historical average.", - findingsCaptured: true, - groupId, - }), - ); - - const tool = createSessionsListTool({ sessionId: "parent-001" }); - const result = await tool.execute("call-1", {}); - - const text = (result.content[0] as { text: string }).text; - expect(text).toContain("id:run-g1"); - expect(text).toContain("id:run-g2"); - expect(text).toContain("AI infrastructure capex growing 40% YoY."); - expect(text).toContain("Valuation risk: forward P/E above historical average."); - }); - - it("shows findings status for running task", async () => { - const now = Date.now(); - seedSubagentRunForTests( - makeRecord({ - runId: "run-running", - label: "Still Running", - startedAt: now - 30000, - // no endedAt - }), - ); - - const tool = createSessionsListTool({ sessionId: "parent-001" }); - const result = await tool.execute("call-1", { runId: "run-running" }); - - const text = (result.content[0] as { text: string }).text; - expect(text).toContain("Status: running"); - expect(text).toContain("Findings: (still running)"); - }); -}); diff --git a/packages/core/src/agent/tools/sessions-list.ts b/packages/core/src/agent/tools/sessions-list.ts deleted file mode 100644 index 8905b943..00000000 --- a/packages/core/src/agent/tools/sessions-list.ts +++ /dev/null @@ -1,273 +0,0 @@ -/** - * sessions_list tool — allows an agent to view its spawned subagent runs. - * - * Lists all subagent runs for the current session, or shows details for a - * specific run when a runId is provided. - */ - -import { Type } from "@sinclair/typebox"; -import type { AgentTool } from "@mariozechner/pi-agent-core"; -import { listSubagentRuns, getSubagentRun, getSubagentGroup } from "../subagent/registry.js"; -import type { SubagentRunRecord } from "../subagent/types.js"; - -const SessionsListSchema = Type.Object({ - runId: Type.Optional( - Type.String({ description: "Optional run ID to get details for a specific run. If omitted, lists all runs." }), - ), -}); - -type SessionsListArgs = { - runId?: string; -}; - -export type SessionsListResult = { - runs: Array<{ - runId: string; - label?: string | undefined; - task: string; - status: "running" | "ok" | "error" | "timeout" | "unknown"; - startedAt?: number | undefined; - endedAt?: number | undefined; - findings?: string | undefined; - }>; -}; - -export interface CreateSessionsListToolOptions { - /** Session ID of the current (requester) agent */ - sessionId?: string; -} - -function resolveStatus(record: SubagentRunRecord): "running" | "ok" | "error" | "timeout" | "unknown" { - if (!record.endedAt) return "running"; - return record.outcome?.status ?? "unknown"; -} - -function formatElapsed(ms: number): string { - const totalSeconds = Math.round(ms / 1000); - if (totalSeconds < 60) return `${totalSeconds}s`; - const minutes = Math.floor(totalSeconds / 60); - const seconds = totalSeconds % 60; - if (minutes < 60) return seconds > 0 ? `${minutes}m${seconds}s` : `${minutes}m`; - const hours = Math.floor(minutes / 60); - const remainingMinutes = minutes % 60; - return remainingMinutes > 0 ? `${hours}h${remainingMinutes}m` : `${hours}h`; -} - -function formatRunSummary(record: SubagentRunRecord, index: number, now: number): string { - const status = resolveStatus(record); - const displayName = record.label || record.task.slice(0, 60); - const statusTag = `[${status}]`.padEnd(10); - - let timing = ""; - if (status === "running" && record.startedAt) { - timing = `started ${formatElapsed(now - record.startedAt)} ago`; - } else if (record.startedAt && record.endedAt) { - timing = `completed in ${formatElapsed(record.endedAt - record.startedAt)}`; - } - - const parts = [`#${index + 1} ${statusTag} "${displayName}"`]; - if (timing) parts.push(`(${record.runId.slice(0, 8)}…, ${timing})`); - else parts.push(`(${record.runId.slice(0, 8)}…)`); - - return parts.join(" "); -} - -function formatRunDetail(record: SubagentRunRecord, now: number): string { - const status = resolveStatus(record); - const lines: string[] = [ - `Run: ${record.runId}`, - ]; - - if (record.label) lines.push(`Label: ${record.label}`); - if (record.groupId) { - const group = getSubagentGroup(record.groupId); - lines.push(`Group: ${record.groupId}${group?.label ? ` (${group.label})` : ""}`); - if (group?.next) lines.push(`Continuation: ${group.next.slice(0, 120)}${group.next.length > 120 ? "…" : ""}`); - } - lines.push(`Task: ${record.task}`); - lines.push(`Status: ${status}${record.outcome?.error ? ` — ${record.outcome.error}` : ""}`); - lines.push(`Child Session: ${record.childSessionId}`); - lines.push(`Created: ${new Date(record.createdAt).toISOString()} (${formatElapsed(now - record.createdAt)} ago)`); - - if (record.startedAt) { - lines.push(`Started: ${new Date(record.startedAt).toISOString()} (${formatElapsed(now - record.startedAt)} ago)`); - } - if (record.endedAt) { - lines.push(`Ended: ${new Date(record.endedAt).toISOString()}`); - if (record.startedAt) { - lines.push(`Duration: ${formatElapsed(record.endedAt - record.startedAt)}`); - } - } - - if (record.findingsCaptured) { - lines.push(`Findings: ${record.findings || "(no output)"}`); - } else if (record.endedAt) { - lines.push("Findings: (not yet captured)"); - } else { - lines.push("Findings: (still running)"); - } - - if (record.announced) lines.push("Announced: yes"); - - return lines.join("\n"); -} - -function toResultRun(record: SubagentRunRecord) { - return { - runId: record.runId, - label: record.label, - task: record.task, - status: resolveStatus(record), - startedAt: record.startedAt, - endedAt: record.endedAt, - findings: record.findings, - }; -} - -export function createSessionsListTool( - options: CreateSessionsListToolOptions, -): AgentTool { - return { - name: "sessions_list", - label: "List Subagent Runs", - description: - "List all subagent runs spawned by this session and their current status. " + - "Optionally pass a runId to get detailed information about a specific run. " + - "Use this to check subagent progress or when the user asks about status.", - parameters: SessionsListSchema, - execute: async (_toolCallId, args) => { - const { runId } = args as SessionsListArgs; - const requesterSessionId = options.sessionId; - - if (!requesterSessionId) { - return { - content: [{ type: "text", text: "No session ID available. Cannot list subagent runs." }], - details: { runs: [] }, - }; - } - - const now = Date.now(); - - // Detail mode: specific run - if (runId) { - const record = getSubagentRun(runId); - if (!record) { - return { - content: [{ type: "text", text: `Run not found: ${runId}` }], - details: { runs: [] }, - }; - } - if (record.requesterSessionId !== requesterSessionId) { - return { - content: [{ type: "text", text: `Run not found: ${runId}` }], - details: { runs: [] }, - }; - } - return { - content: [{ type: "text", text: formatRunDetail(record, now) }], - details: { runs: [toResultRun(record)] }, - }; - } - - // List mode: all runs for this session - const runs = listSubagentRuns(requesterSessionId); - - if (runs.length === 0) { - return { - content: [{ type: "text", text: "No subagent runs for this session." }], - details: { runs: [] }, - }; - } - - const someRunning = runs.some((r) => !r.endedAt); - - // Build status lines, grouping runs by groupId - const statusLines: string[] = []; - const groupedRuns = new Map(); - const ungroupedRuns: SubagentRunRecord[] = []; - - for (const r of runs) { - if (r.groupId) { - const list = groupedRuns.get(r.groupId) ?? []; - list.push(r); - groupedRuns.set(r.groupId, list); - } else { - ungroupedRuns.push(r); - } - } - - let idx = 0; - - // Grouped runs - for (const [gId, gRuns] of groupedRuns) { - const group = getSubagentGroup(gId); - const groupLabel = group?.label || `Group ${gId.slice(0, 8)}…`; - const done = gRuns.filter(r => r.endedAt).length; - const nextSnippet = group?.next ? ` → next: "${group.next.slice(0, 60)}${group.next.length > 60 ? "…" : ""}"` : ""; - statusLines.push(`\n 📦 ${groupLabel} (${done}/${gRuns.length} done${nextSnippet})`); - - for (const r of gRuns) { - idx++; - const displayName = r.label || r.task.slice(0, 60); - const status = resolveStatus(r); - if (status === "running") { - const elapsed = r.startedAt ? formatElapsed(now - r.startedAt) : "just spawned"; - statusLines.push(` ${idx}. [RUNNING] "${displayName}" (${elapsed}) id:${r.runId}`); - } else { - const elapsed = r.startedAt && r.endedAt ? formatElapsed(r.endedAt - r.startedAt) : ""; - const findings = r.findingsCaptured - ? (r.findings ? r.findings.slice(0, 4000) + (r.findings.length > 4000 ? "…" : "") : "(no output)") - : "(findings not yet captured)"; - statusLines.push(` ${idx}. [${status.toUpperCase()}] "${displayName}" (${elapsed}) id:${r.runId}\n Findings: ${findings}`); - } - } - } - - // Ungrouped runs - for (const r of ungroupedRuns) { - idx++; - const displayName = r.label || r.task.slice(0, 60); - const status = resolveStatus(r); - if (status === "running") { - const elapsed = r.startedAt ? formatElapsed(now - r.startedAt) : "just spawned"; - statusLines.push(` ${idx}. [RUNNING] "${displayName}" (${elapsed}) id:${r.runId}`); - } else { - const elapsed = r.startedAt && r.endedAt ? formatElapsed(r.endedAt - r.startedAt) : ""; - const findings = r.findingsCaptured - ? (r.findings ? r.findings.slice(0, 4000) + (r.findings.length > 4000 ? "…" : "") : "(no output)") - : "(findings not yet captured)"; - statusLines.push(` ${idx}. [${status.toUpperCase()}] "${displayName}" (${elapsed}) id:${r.runId}\n Findings: ${findings}`); - } - } - - const header = `Subagent runs for this session: ${runs.length} total`; - const body = statusLines.join("\n"); - - // If any subagents are still running, return status with wait instruction. - // We do NOT use steer() here — steer would cancel unrelated tool calls - // that the LLM may be processing in the same batch. - if (someRunning) { - const runningCount = runs.filter((r) => !r.endedAt).length; - return { - content: [ - { - type: "text", - text: - header + "\n" + body + "\n\n" + - `STATUS: ${runningCount} subagent(s) still running. This is normal — they need time to complete.\n` + - "ACTION REQUIRED: Do NOT call sessions_list again. Results will be delivered into your context automatically when they finish.\n" + - "Do NOT attempt to do this work yourself — the subagents are handling it.", - }, - ], - details: { runs: runs.map(toResultRun) }, - }; - } - - // All completed — normal response - return { - content: [{ type: "text", text: header + "\n" + body }], - details: { runs: runs.map(toResultRun) }, - }; - }, - }; -} diff --git a/packages/core/src/agent/tools/sessions-spawn.test.ts b/packages/core/src/agent/tools/sessions-spawn.test.ts deleted file mode 100644 index 0a0d80a9..00000000 --- a/packages/core/src/agent/tools/sessions-spawn.test.ts +++ /dev/null @@ -1,61 +0,0 @@ -import { describe, it, expect, beforeEach } from "vitest"; -import { createSessionsSpawnTool } from "./sessions-spawn.js"; -import { getSubagentGroup, resetSubagentRegistryForTests } from "../subagent/registry.js"; - -describe("sessions_spawn tool", () => { - beforeEach(() => { - resetSubagentRegistryForTests(); - }); - it("has correct name and description", () => { - const tool = createSessionsSpawnTool({ isSubagent: false, sessionId: "test-session" }); - expect(tool.name).toBe("sessions_spawn"); - expect(tool.label).toBe("Spawn Subagent"); - expect(tool.description).toContain("Spawn a background subagent"); - }); - - it("rejects spawn from subagent sessions", async () => { - const tool = createSessionsSpawnTool({ isSubagent: true, sessionId: "child-session" }); - - const result = await tool.execute( - "call-1", - { task: "do something" } as any, - new AbortController().signal, - ); - - expect(result.details.status).toBe("error"); - expect(result.details.error).toContain("not allowed from sub-agent sessions"); - const firstContent = result.content[0] as { type: string; text: string }; - expect(firstContent.text).toContain("not allowed"); - }); - - it("auto-creates group when custom groupId is provided", async () => { - const tool = createSessionsSpawnTool({ isSubagent: false, sessionId: "parent-session" }); - - // Should not error — the group is auto-created - await tool.execute( - "call-group", - { task: "research topic", label: "Research", groupId: "my-custom-group" } as any, - new AbortController().signal, - ); - - // Verify group was created in the registry - const group = getSubagentGroup("my-custom-group"); - expect(group).toBeDefined(); - expect(group!.groupId).toBe("my-custom-group"); - expect(group!.label).toBe("Group: Research"); - }); - - it("fails gracefully when Hub is not initialized", async () => { - const tool = createSessionsSpawnTool({ isSubagent: false, sessionId: "parent-session" }); - - const result = await tool.execute( - "call-2", - { task: "analyze code", label: "Code Analysis" } as any, - new AbortController().signal, - ); - - // Should get an error because Hub singleton is not set up in test - expect(result.details.status).toBe("error"); - expect(result.details.error).toContain("Hub"); - }); -}); diff --git a/packages/core/src/agent/tools/sessions-spawn.ts b/packages/core/src/agent/tools/sessions-spawn.ts deleted file mode 100644 index 030fbe27..00000000 --- a/packages/core/src/agent/tools/sessions-spawn.ts +++ /dev/null @@ -1,220 +0,0 @@ -/** - * sessions_spawn tool — allows a parent agent to spawn subagent runs. - * - * Subagents run in isolated sessions with restricted tools. - * Results are announced back to the parent when the child completes. - */ - -import { v7 as uuidv7 } from "uuid"; -import { Type } from "@sinclair/typebox"; -import type { AgentTool } from "@mariozechner/pi-agent-core"; -import { getHub } from "../../hub/hub-singleton.js"; -import { buildSubagentSystemPrompt } from "../subagent/announce.js"; -import { registerSubagentRun, createSubagentGroup, getSubagentGroup } from "../subagent/registry.js"; -import { resolveTools } from "../tools.js"; - -const SessionsSpawnSchema = Type.Object({ - task: Type.String({ description: "The task for the subagent to perform.", minLength: 1 }), - label: Type.Optional( - Type.String({ description: "Human-readable label for this background task." }), - ), - model: Type.Optional( - Type.String({ description: "Override the LLM model for the subagent (e.g. 'gpt-4o', 'claude-sonnet')." }), - ), - cleanup: Type.Optional( - Type.Union([Type.Literal("delete"), Type.Literal("keep")], { - description: "Session cleanup after completion. 'delete' removes session files, 'keep' preserves for audit. Default: 'delete'.", - }), - ), - timeoutSeconds: Type.Optional( - Type.Number({ - description: - "Execution timeout in seconds. Default: 1800 (30 min). " + - "Set to 0 for no timeout (useful for complex, long-running tasks). " + - "The subagent will be terminated if it exceeds this limit.", - minimum: 0, - }), - ), - announce: Type.Optional( - Type.Union([Type.Literal("immediate"), Type.Literal("silent")], { - description: - "Announcement mode. 'immediate' (default): findings delivered as each subagent completes. " + - "'silent': defer all announcements until every silent subagent from this session finishes, " + - "then deliver one combined report. Use 'silent' when spawning multiple subagents to collect " + - "data in parallel and you want to summarize everything at once. " + - "Ignored when groupId is provided (groups always collect all results before announcing).", - }), - ), - groupId: Type.Optional( - Type.String({ - description: - "Join an existing group. Pass the groupId returned by a previous sessions_spawn call " + - "to add this subagent to the same group. All runs in a group are announced together " + - "when the last one completes. If omitted AND 'next' is provided, a new group is created automatically.", - }), - ), - next: Type.Optional( - Type.String({ - description: - "Continuation task to execute after ALL subagents in the group complete. " + - "Only used when creating a new group (first spawn without groupId). " + - "When set, the combined findings from all subagents plus this 'next' prompt " + - "are delivered to you so you can perform follow-up work (e.g. summarize, generate reports, write files). " + - "Setting 'next' automatically creates a group and implies silent collection.", - }), - ), -}); - -type SessionsSpawnArgs = { - task: string; - label?: string; - model?: string; - cleanup?: "delete" | "keep"; - timeoutSeconds?: number; - announce?: "immediate" | "silent"; - groupId?: string; - next?: string; -}; - -export type SessionsSpawnResult = { - status: "accepted" | "error"; - childSessionId?: string; - runId?: string; - groupId?: string; - error?: string; -}; - -export interface CreateSessionsSpawnToolOptions { - /** Whether the current agent is itself a subagent */ - isSubagent?: boolean; - /** Session ID of the current (requester) agent */ - sessionId?: string; - /** Resolved provider ID of the parent agent (inherited by subagents) */ - provider?: string; -} - -export function createSessionsSpawnTool( - options: CreateSessionsSpawnToolOptions, -): AgentTool { - return { - name: "sessions_spawn", - label: "Spawn Subagent", - description: - "Spawn a background subagent to handle a specific task. The subagent runs in an isolated session with its own tool set. " + - "When it completes, its findings are delivered directly into your context automatically. " + - "After spawning, do NOT proceed with work that depends on the results — but you can still chat or do unrelated tasks. " + - "When spawning multiple subagents for a collect-then-act workflow, ALWAYS use the `next` parameter " + - "on the first spawn to define follow-up work, then pass the returned groupId to subsequent spawns. " + - "Use this for parallelizable work, long-running analysis, or tasks that benefit from isolation.", - parameters: SessionsSpawnSchema, - execute: async (_toolCallId, args) => { - const { task, label, model, cleanup = "delete", timeoutSeconds, announce, next } = args as SessionsSpawnArgs; - let { groupId } = args as SessionsSpawnArgs; - - // Guard: subagents cannot spawn subagents - if (options.isSubagent) { - return { - content: [{ type: "text", text: "Error: sessions_spawn is not allowed from sub-agent sessions." }], - details: { - status: "error", - error: "sessions_spawn is not allowed from sub-agent sessions", - }, - }; - } - - const requesterSessionId = options.sessionId ?? "unknown"; - const runId = uuidv7(); - const childSessionId = uuidv7(); - - // Auto-create group when groupId is provided but doesn't exist yet, - // or when `next` is provided without a groupId. - if (groupId) { - const existingGroup = getSubagentGroup(groupId); - if (!existingGroup) { - // LLM provided a custom groupId — auto-create the group - createSubagentGroup({ - groupId, - requesterSessionId, - label: label ? `Group: ${label}` : undefined, - next, - }); - } - } else if (next) { - groupId = uuidv7(); - createSubagentGroup({ - groupId, - requesterSessionId, - label: label ? `Group: ${label}` : undefined, - next, - }); - } - - // Resolve tools for the subagent (with isSubagent=true for policy filtering) - const subagentTools = resolveTools({ isSubagent: true }); - const toolNames = subagentTools.map((t) => t.name); - - // Build system prompt for the child - const systemPrompt = buildSubagentSystemPrompt({ - requesterSessionId, - childSessionId, - label, - task, - tools: toolNames, - }); - - // Spawn child agent via Hub - try { - const hub = getHub(); - const childAgent = hub.createSubagent(childSessionId, { - systemPrompt, - model, - provider: options.provider, - }); - - // Register the run for lifecycle tracking. - // The write is deferred via the start callback so the child only - // begins work once a concurrency slot is available in the queue. - registerSubagentRun({ - runId, - childSessionId, - requesterSessionId, - task, - label, - cleanup, - timeoutSeconds, - announce: groupId ? "silent" : announce, - groupId, - start: () => childAgent.write(task), - }); - - // Build response text - const groupInfo = groupId ? `\nGroup: ${groupId}` : ""; - const nextInfo = next ? `\nContinuation: "${next.slice(0, 100)}${next.length > 100 ? "…" : ""}"` : ""; - const responseText = - `Subagent spawned: ${label || task.slice(0, 80)}\n` + - `Run: ${runId}${groupInfo}${nextInfo}\n\n` + - `⏳ WAITING FOR RESULTS — do NOT proceed with work that depends on these results.\n` + - `Do NOT fabricate data or completion status. Results will arrive in your context automatically.`; - - return { - content: [{ type: "text", text: responseText }], - details: { - status: "accepted", - childSessionId, - runId, - groupId, - }, - }; - } catch (err) { - const message = err instanceof Error ? err.message : String(err); - return { - content: [{ type: "text", text: `Error spawning subagent: ${message}` }], - details: { - status: "error", - error: message, - }, - }; - } - }, - }; -} diff --git a/packages/ui/src/components/tool-call-item.tsx b/packages/ui/src/components/tool-call-item.tsx index 2936714d..51b5279c 100644 --- a/packages/ui/src/components/tool-call-item.tsx +++ b/packages/ui/src/components/tool-call-item.tsx @@ -39,7 +39,7 @@ const TOOL_DISPLAY: Record = { memory_set: { label: "MemorySet", icon: Database }, memory_delete: { label: "MemoryDelete", icon: Database }, memory_list: { label: "MemoryList", icon: Database }, - sessions_spawn: { label: "SpawnSession", icon: GitBranch }, + delegate: { label: "Delegate", icon: GitBranch }, data: { label: "Data", icon: BarChart3 }, } @@ -81,11 +81,12 @@ function getSubtitle(toolName: string, args?: Record): string { const ticker = params?.ticker ? String(params.ticker).toUpperCase() : "" return ticker ? `${action} ${ticker}` : action } - case "sessions_spawn": { - const label = args.label ? String(args.label) : "" - if (label) return label.length > 60 ? label.slice(0, 57) + "…" : label - const task = String(args.task ?? "") - return task.length > 60 ? task.slice(0, 57) + "…" : task + case "delegate": { + const tasks = args.tasks as Array<{ label?: string; task?: string }> | undefined + if (!tasks?.length) return "" + const labels = tasks.map((t, i) => t.label || `Task ${i + 1}`) + const summary = labels.join(", ") + return summary.length > 60 ? summary.slice(0, 57) + "…" : summary } default: return "" @@ -106,7 +107,7 @@ const RUNNING_LABELS: Record = { web_search: "searching…", web_fetch: "fetching…", data: "fetching…", - sessions_spawn: "spawning…", + delegate: "delegating…", } /** Stats derived from tool result content */ diff --git a/scripts/swe-bench/run.ts b/scripts/swe-bench/run.ts index 5e6c4b40..e35efa72 100644 --- a/scripts/swe-bench/run.ts +++ b/scripts/swe-bench/run.ts @@ -217,7 +217,7 @@ async function runTask( enableSkills: false, tools: { // Only allow coding tools — no web, no cron, no sessions - deny: ["web_fetch", "web_search", "cron", "data", "sessions_spawn", "sessions_list", "memory_search", "send_file"], + deny: ["web_fetch", "web_search", "cron", "data", "delegate", "memory_search", "send_file"], }, }; From aada2916f46bbaa3134e3c6fd52acba3ae6b649c Mon Sep 17 00:00:00 2001 From: Jiayuan Zhang Date: Mon, 16 Feb 2026 00:58:23 +0800 Subject: [PATCH 3/4] fix(agent): clear timeout timer in delegate tool to prevent unhandled rejection The setTimeout in runSubagentTask was never cleared when childAgent.run() completed before the timeout. The dangling timer would later reject an unobserved promise, causing an unhandled promise rejection crash in Node.js v15+. Capture the timer and clear it in a .finally() block. Co-Authored-By: Claude Opus 4.6 --- packages/core/src/agent/tools/delegate.ts | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/packages/core/src/agent/tools/delegate.ts b/packages/core/src/agent/tools/delegate.ts index 9c414006..db3e2907 100644 --- a/packages/core/src/agent/tools/delegate.ts +++ b/packages/core/src/agent/tools/delegate.ts @@ -133,10 +133,11 @@ async function runSubagentTask( if (timeoutMs > 0) { // Race agent.run against timeout + let timer: ReturnType; result = await Promise.race([ childAgent.run(taskDef.task), new Promise((_, reject) => { - setTimeout(() => { + timer = setTimeout(() => { timedOut = true; childAgent.abort(); reject(new Error("timeout")); @@ -147,6 +148,8 @@ async function runSubagentTask( return { text: "", error: `Timed out after ${formatElapsed(timeoutMs)}` }; } throw err; + }).finally(() => { + clearTimeout(timer); }); } else { // No timeout From 9c8be30d3d26dcb9c74dcce9db1a933cc864994c Mon Sep 17 00:00:00 2001 From: Jiayuan Zhang Date: Mon, 16 Feb 2026 01:10:18 +0800 Subject: [PATCH 4/4] fix(test): increase timeout for summary fallback artifact extraction test UC4 test times out in CI (5s default) because generateSummary's API provider layer takes longer to fail on slow CI runners. Increase to 15s. Co-Authored-By: Claude Opus 4.6 --- .../agent/context-window/artifact-pruning-integration.test.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/core/src/agent/context-window/artifact-pruning-integration.test.ts b/packages/core/src/agent/context-window/artifact-pruning-integration.test.ts index 152aae4d..c301252a 100644 --- a/packages/core/src/agent/context-window/artifact-pruning-integration.test.ts +++ b/packages/core/src/agent/context-window/artifact-pruning-integration.test.ts @@ -185,7 +185,7 @@ describe("Phase 2 E2E: Artifact-Aware Pruning", () => { describe("Phase 2 E2E: Summary Fallback Artifact Extraction", () => { // UC4: summary fallback extracts artifact references - it("UC4: summary fallback includes 'Saved Artifacts' section with all artifact refs", async () => { + it("UC4: summary fallback includes 'Saved Artifacts' section with all artifact refs", { timeout: 15_000 }, async () => { const mod = await import("./summary-fallback.js"); const messages: AgentMessage[] = [