From f60551195aaa03aa6b70033897c7edda0ce7b368 Mon Sep 17 00:00:00 2001 From: Jiayuan Zhang Date: Sun, 15 Feb 2026 22:08:58 +0800 Subject: [PATCH] chore(agent): remove old sessions_spawn/sessions_list tools and update references Delete sessions-spawn.ts, sessions-list.ts and their tests. Update CLI to remove waitForSubagents polling workaround (delegate is synchronous). Update UI, desktop IPC, SWE-bench, and system prompt tests to use the new delegate tool name. Co-Authored-By: Claude Opus 4.6 --- apps/cli/src/commands/run.ts | 46 +-- apps/desktop/src/main/ipc/agent.ts | 2 +- .../src/agent/system-prompt/builder.test.ts | 14 +- .../src/agent/system-prompt/sections.test.ts | 12 +- .../src/agent/tools/sessions-list.test.ts | 211 -------------- .../core/src/agent/tools/sessions-list.ts | 273 ------------------ .../src/agent/tools/sessions-spawn.test.ts | 61 ---- .../core/src/agent/tools/sessions-spawn.ts | 220 -------------- packages/ui/src/components/tool-call-item.tsx | 15 +- scripts/swe-bench/run.ts | 2 +- 10 files changed, 24 insertions(+), 832 deletions(-) delete mode 100644 packages/core/src/agent/tools/sessions-list.test.ts delete mode 100644 packages/core/src/agent/tools/sessions-list.ts delete mode 100644 packages/core/src/agent/tools/sessions-spawn.test.ts delete mode 100644 packages/core/src/agent/tools/sessions-spawn.ts diff --git a/apps/cli/src/commands/run.ts b/apps/cli/src/commands/run.ts index d6ee35d7..add43695 100644 --- a/apps/cli/src/commands/run.ts +++ b/apps/cli/src/commands/run.ts @@ -7,7 +7,7 @@ */ import { join } from "node:path"; -import { Agent, Hub, listSubagentRuns } from "@multica/core"; +import { Agent, Hub } from "@multica/core"; import type { AgentOptions } from "@multica/core"; import type { ToolsConfig } from "@multica/core"; import { DATA_DIR } from "@multica/utils"; @@ -238,52 +238,8 @@ export async function runCommand(args: string[]): Promise { console.error(`Error: ${result.error}`); process.exitCode = 1; } - - // Wait for sub-agents to complete and parent to process their results. - // Without this, CLI exits before sub-agent announcements are delivered. - await waitForSubagents(agent); } finally { hub.shutdown(); } } -/** - * Wait for any running sub-agents to complete, then output their findings. - * - * In CLI mode, the parent Agent is not registered with the Hub, so the normal - * announce flow (Hub → writeInternal) can't deliver results. Instead, we poll - * the registry and print findings directly once all sub-agents finish. - * - * Max wait: 30 minutes (matches default sub-agent timeout). - */ -async function waitForSubagents(agent: Agent): Promise { - const MAX_WAIT_MS = 30 * 60 * 1000; - const POLL_INTERVAL_MS = 2000; - const start = Date.now(); - - const allRuns = listSubagentRuns(agent.sessionId); - if (allRuns.length === 0) return; - - // Phase 1: Wait for all sub-agent runs to finish - while (Date.now() - start < MAX_WAIT_MS) { - const runs = listSubagentRuns(agent.sessionId); - const running = runs.filter((r) => !r.endedAt); - if (running.length === 0) break; - console.error(dim(`[waiting for ${running.length} sub-agent(s)...]`)); - await new Promise((r) => setTimeout(r, POLL_INTERVAL_MS)); - } - - // Phase 2: Output sub-agent findings directly (bypasses Hub announce flow) - const completedRuns = listSubagentRuns(agent.sessionId).filter((r) => r.endedAt); - if (completedRuns.length === 0) return; - - console.error(dim(`[${completedRuns.length} sub-agent(s) completed]`)); - - for (const run of completedRuns) { - const displayName = run.label || run.task.slice(0, 60); - const status = run.outcome?.status ?? "unknown"; - const findings = run.findings || "(no output)"; - console.log(`\n--- Sub-agent: ${displayName} [${status}] ---`); - console.log(findings); - } -} diff --git a/apps/desktop/src/main/ipc/agent.ts b/apps/desktop/src/main/ipc/agent.ts index 89f076a9..dc2189f2 100644 --- a/apps/desktop/src/main/ipc/agent.ts +++ b/apps/desktop/src/main/ipc/agent.ts @@ -13,7 +13,7 @@ const TOOL_GROUPS: Record = { 'group:runtime': ['exec', 'process'], 'group:web': ['web_search', 'web_fetch'], 'group:memory': ['memory_search'], - 'group:subagent': ['sessions_spawn'], + 'group:subagent': ['delegate'], 'group:cron': ['cron'], } diff --git a/packages/core/src/agent/system-prompt/builder.test.ts b/packages/core/src/agent/system-prompt/builder.test.ts index 21dce243..ef85cab4 100644 --- a/packages/core/src/agent/system-prompt/builder.test.ts +++ b/packages/core/src/agent/system-prompt/builder.test.ts @@ -11,7 +11,7 @@ const PROFILE = { config: { name: "TestAgent" }, }; -const TOOLS = ["read", "write", "edit", "glob", "exec", "sessions_spawn", "web_search"]; +const TOOLS = ["read", "write", "edit", "glob", "exec", "delegate", "web_search"]; describe("buildSystemPrompt", () => { // ── Full mode ───────────────────────────────────────────────────────── @@ -43,9 +43,9 @@ describe("buildSystemPrompt", () => { expect(result).toContain("## Tool Call Style"); }); - it("full mode includes sub-agents section when sessions_spawn present", () => { - const result = buildSystemPrompt({ mode: "full", tools: ["sessions_spawn"] }); - expect(result).toContain("## Sub-Agents"); + it("full mode includes delegation section when delegate present", () => { + const result = buildSystemPrompt({ mode: "full", tools: ["delegate"] }); + expect(result).toContain("## Delegation"); }); it("full mode includes web access section when web tools present", () => { @@ -131,9 +131,9 @@ describe("buildSystemPrompt", () => { expect(result).not.toContain("## Skills"); }); - it("minimal mode excludes sub-agents section even with sessions_spawn", () => { - const result = buildSystemPrompt({ mode: "minimal", tools: ["sessions_spawn"] }); - expect(result).not.toContain("## Sub-Agents"); + it("minimal mode excludes delegation section even with delegate", () => { + const result = buildSystemPrompt({ mode: "minimal", tools: ["delegate"] }); + expect(result).not.toContain("## Delegation"); }); // ── None mode ───────────────────────────────────────────────────────── diff --git a/packages/core/src/agent/system-prompt/sections.test.ts b/packages/core/src/agent/system-prompt/sections.test.ts index 71de0871..a1d2dd5a 100644 --- a/packages/core/src/agent/system-prompt/sections.test.ts +++ b/packages/core/src/agent/system-prompt/sections.test.ts @@ -167,14 +167,14 @@ describe("buildToolCallStyleSection", () => { }); describe("buildConditionalToolSections", () => { - it("includes sub-agents section when sessions_spawn present in full mode", () => { - const result = buildConditionalToolSections(["sessions_spawn"], "full"); - expect(result.join("\n")).toContain("## Sub-Agents"); + it("includes delegation section when delegate present in full mode", () => { + const result = buildConditionalToolSections(["delegate"], "full"); + expect(result.join("\n")).toContain("## Delegation"); }); - it("excludes sub-agents section in minimal mode", () => { - const result = buildConditionalToolSections(["sessions_spawn"], "minimal"); - expect(result.join("\n")).not.toContain("## Sub-Agents"); + it("excludes delegation section in minimal mode", () => { + const result = buildConditionalToolSections(["delegate"], "minimal"); + expect(result.join("\n")).not.toContain("## Delegation"); }); it("includes web access section when web tools present", () => { diff --git a/packages/core/src/agent/tools/sessions-list.test.ts b/packages/core/src/agent/tools/sessions-list.test.ts deleted file mode 100644 index 637a8537..00000000 --- a/packages/core/src/agent/tools/sessions-list.test.ts +++ /dev/null @@ -1,211 +0,0 @@ -import { describe, it, expect, beforeEach } from "vitest"; -import type { SubagentRunRecord } from "../subagent/types.js"; -import { resetSubagentRegistryForTests, seedSubagentRunForTests } from "../subagent/registry.js"; -import { createSessionsListTool } from "./sessions-list.js"; - -function makeRecord(overrides: Partial = {}): SubagentRunRecord { - return { - runId: "run-001", - childSessionId: "child-001", - requesterSessionId: "parent-001", - task: "Test task", - cleanup: "delete", - createdAt: 1700000000000, - ...overrides, - }; -} - -describe("sessions_list tool", () => { - beforeEach(() => { - resetSubagentRegistryForTests(); - }); - - it("returns empty message when no runs exist", async () => { - const tool = createSessionsListTool({ sessionId: "parent-001" }); - const result = await tool.execute("call-1", {}); - - expect(result.content[0]).toEqual({ - type: "text", - text: "No subagent runs for this session.", - }); - expect(result.details).toEqual({ runs: [] }); - }); - - it("lists multiple runs with correct status mapping", async () => { - const now = Date.now(); - seedSubagentRunForTests( - makeRecord({ - runId: "run-aaa", - label: "Code Review", - startedAt: now - 45000, - }), - ); - seedSubagentRunForTests( - makeRecord({ - runId: "run-bbb", - label: "Test Analysis", - startedAt: now - 60000, - endedAt: now - 30000, - outcome: { status: "ok" }, - findings: "All tests passed successfully.", - findingsCaptured: true, - }), - ); - seedSubagentRunForTests( - makeRecord({ - runId: "run-ccc", - label: "Lint Check", - startedAt: now - 60000, - endedAt: now, - outcome: { status: "error", error: "timeout" }, - findings: "Lint check timed out.", - findingsCaptured: true, - }), - ); - - const tool = createSessionsListTool({ sessionId: "parent-001" }); - const result = await tool.execute("call-1", {}); - - const text = result.content[0]!; - expect(text.type).toBe("text"); - expect((text as { text: string }).text).toContain("3 total"); - expect((text as { text: string }).text).toContain("[RUNNING]"); - expect((text as { text: string }).text).toContain("[OK]"); - expect((text as { text: string }).text).toContain("[ERROR]"); - expect((text as { text: string }).text).toContain("Code Review"); - expect((text as { text: string }).text).toContain("Test Analysis"); - expect((text as { text: string }).text).toContain("Lint Check"); - // Verify full runId is shown for completed runs - expect((text as { text: string }).text).toContain("id:run-aaa"); - expect((text as { text: string }).text).toContain("id:run-bbb"); - expect((text as { text: string }).text).toContain("id:run-ccc"); - // Verify findings are shown for completed runs - expect((text as { text: string }).text).toContain("All tests passed successfully."); - expect((text as { text: string }).text).toContain("Lint check timed out."); - - expect(result.details!.runs).toHaveLength(3); - expect(result.details!.runs[0]!.status).toBe("running"); - expect(result.details!.runs[1]!.status).toBe("ok"); - expect(result.details!.runs[2]!.status).toBe("error"); - }); - - it("returns detail for a specific runId", async () => { - const now = Date.now(); - seedSubagentRunForTests( - makeRecord({ - runId: "run-detail", - label: "Deep Analysis", - task: "Analyze the authentication module thoroughly", - startedAt: now - 90000, - endedAt: now - 10000, - outcome: { status: "ok" }, - findings: "Found 2 potential issues in token validation.", - findingsCaptured: true, - }), - ); - - const tool = createSessionsListTool({ sessionId: "parent-001" }); - const result = await tool.execute("call-1", { runId: "run-detail" }); - - const text = (result.content[0] as { text: string }).text; - expect(text).toContain("Run: run-detail"); - expect(text).toContain("Label: Deep Analysis"); - expect(text).toContain("Status: ok"); - expect(text).toContain("Found 2 potential issues"); - expect(text).toContain("Duration:"); - - expect(result.details!.runs).toHaveLength(1); - expect(result.details!.runs[0]!.runId).toBe("run-detail"); - }); - - it("returns not found for unknown runId", async () => { - const tool = createSessionsListTool({ sessionId: "parent-001" }); - const result = await tool.execute("call-1", { runId: "nonexistent" }); - - const text = (result.content[0] as { text: string }).text; - expect(text).toContain("Run not found"); - expect(result.details).toEqual({ runs: [] }); - }); - - it("rejects runId belonging to a different requester", async () => { - seedSubagentRunForTests( - makeRecord({ - runId: "run-other", - requesterSessionId: "other-parent", - }), - ); - - const tool = createSessionsListTool({ sessionId: "parent-001" }); - const result = await tool.execute("call-1", { runId: "run-other" }); - - const text = (result.content[0] as { text: string }).text; - expect(text).toContain("Run not found"); - expect(result.details).toEqual({ runs: [] }); - }); - - it("handles missing sessionId gracefully", async () => { - const tool = createSessionsListTool({}); - const result = await tool.execute("call-1", {}); - - const text = (result.content[0] as { text: string }).text; - expect(text).toContain("No session ID available"); - expect(result.details).toEqual({ runs: [] }); - }); - - it("shows findings for grouped completed runs", async () => { - const now = Date.now(); - const groupId = "group-001"; - seedSubagentRunForTests( - makeRecord({ - runId: "run-g1", - label: "Bull Case Research", - startedAt: now - 60000, - endedAt: now - 10000, - outcome: { status: "ok" }, - findings: "AI infrastructure capex growing 40% YoY.", - findingsCaptured: true, - groupId, - }), - ); - seedSubagentRunForTests( - makeRecord({ - runId: "run-g2", - label: "Bear Case Research", - startedAt: now - 60000, - endedAt: now - 5000, - outcome: { status: "ok" }, - findings: "Valuation risk: forward P/E above historical average.", - findingsCaptured: true, - groupId, - }), - ); - - const tool = createSessionsListTool({ sessionId: "parent-001" }); - const result = await tool.execute("call-1", {}); - - const text = (result.content[0] as { text: string }).text; - expect(text).toContain("id:run-g1"); - expect(text).toContain("id:run-g2"); - expect(text).toContain("AI infrastructure capex growing 40% YoY."); - expect(text).toContain("Valuation risk: forward P/E above historical average."); - }); - - it("shows findings status for running task", async () => { - const now = Date.now(); - seedSubagentRunForTests( - makeRecord({ - runId: "run-running", - label: "Still Running", - startedAt: now - 30000, - // no endedAt - }), - ); - - const tool = createSessionsListTool({ sessionId: "parent-001" }); - const result = await tool.execute("call-1", { runId: "run-running" }); - - const text = (result.content[0] as { text: string }).text; - expect(text).toContain("Status: running"); - expect(text).toContain("Findings: (still running)"); - }); -}); diff --git a/packages/core/src/agent/tools/sessions-list.ts b/packages/core/src/agent/tools/sessions-list.ts deleted file mode 100644 index 8905b943..00000000 --- a/packages/core/src/agent/tools/sessions-list.ts +++ /dev/null @@ -1,273 +0,0 @@ -/** - * sessions_list tool — allows an agent to view its spawned subagent runs. - * - * Lists all subagent runs for the current session, or shows details for a - * specific run when a runId is provided. - */ - -import { Type } from "@sinclair/typebox"; -import type { AgentTool } from "@mariozechner/pi-agent-core"; -import { listSubagentRuns, getSubagentRun, getSubagentGroup } from "../subagent/registry.js"; -import type { SubagentRunRecord } from "../subagent/types.js"; - -const SessionsListSchema = Type.Object({ - runId: Type.Optional( - Type.String({ description: "Optional run ID to get details for a specific run. If omitted, lists all runs." }), - ), -}); - -type SessionsListArgs = { - runId?: string; -}; - -export type SessionsListResult = { - runs: Array<{ - runId: string; - label?: string | undefined; - task: string; - status: "running" | "ok" | "error" | "timeout" | "unknown"; - startedAt?: number | undefined; - endedAt?: number | undefined; - findings?: string | undefined; - }>; -}; - -export interface CreateSessionsListToolOptions { - /** Session ID of the current (requester) agent */ - sessionId?: string; -} - -function resolveStatus(record: SubagentRunRecord): "running" | "ok" | "error" | "timeout" | "unknown" { - if (!record.endedAt) return "running"; - return record.outcome?.status ?? "unknown"; -} - -function formatElapsed(ms: number): string { - const totalSeconds = Math.round(ms / 1000); - if (totalSeconds < 60) return `${totalSeconds}s`; - const minutes = Math.floor(totalSeconds / 60); - const seconds = totalSeconds % 60; - if (minutes < 60) return seconds > 0 ? `${minutes}m${seconds}s` : `${minutes}m`; - const hours = Math.floor(minutes / 60); - const remainingMinutes = minutes % 60; - return remainingMinutes > 0 ? `${hours}h${remainingMinutes}m` : `${hours}h`; -} - -function formatRunSummary(record: SubagentRunRecord, index: number, now: number): string { - const status = resolveStatus(record); - const displayName = record.label || record.task.slice(0, 60); - const statusTag = `[${status}]`.padEnd(10); - - let timing = ""; - if (status === "running" && record.startedAt) { - timing = `started ${formatElapsed(now - record.startedAt)} ago`; - } else if (record.startedAt && record.endedAt) { - timing = `completed in ${formatElapsed(record.endedAt - record.startedAt)}`; - } - - const parts = [`#${index + 1} ${statusTag} "${displayName}"`]; - if (timing) parts.push(`(${record.runId.slice(0, 8)}…, ${timing})`); - else parts.push(`(${record.runId.slice(0, 8)}…)`); - - return parts.join(" "); -} - -function formatRunDetail(record: SubagentRunRecord, now: number): string { - const status = resolveStatus(record); - const lines: string[] = [ - `Run: ${record.runId}`, - ]; - - if (record.label) lines.push(`Label: ${record.label}`); - if (record.groupId) { - const group = getSubagentGroup(record.groupId); - lines.push(`Group: ${record.groupId}${group?.label ? ` (${group.label})` : ""}`); - if (group?.next) lines.push(`Continuation: ${group.next.slice(0, 120)}${group.next.length > 120 ? "…" : ""}`); - } - lines.push(`Task: ${record.task}`); - lines.push(`Status: ${status}${record.outcome?.error ? ` — ${record.outcome.error}` : ""}`); - lines.push(`Child Session: ${record.childSessionId}`); - lines.push(`Created: ${new Date(record.createdAt).toISOString()} (${formatElapsed(now - record.createdAt)} ago)`); - - if (record.startedAt) { - lines.push(`Started: ${new Date(record.startedAt).toISOString()} (${formatElapsed(now - record.startedAt)} ago)`); - } - if (record.endedAt) { - lines.push(`Ended: ${new Date(record.endedAt).toISOString()}`); - if (record.startedAt) { - lines.push(`Duration: ${formatElapsed(record.endedAt - record.startedAt)}`); - } - } - - if (record.findingsCaptured) { - lines.push(`Findings: ${record.findings || "(no output)"}`); - } else if (record.endedAt) { - lines.push("Findings: (not yet captured)"); - } else { - lines.push("Findings: (still running)"); - } - - if (record.announced) lines.push("Announced: yes"); - - return lines.join("\n"); -} - -function toResultRun(record: SubagentRunRecord) { - return { - runId: record.runId, - label: record.label, - task: record.task, - status: resolveStatus(record), - startedAt: record.startedAt, - endedAt: record.endedAt, - findings: record.findings, - }; -} - -export function createSessionsListTool( - options: CreateSessionsListToolOptions, -): AgentTool { - return { - name: "sessions_list", - label: "List Subagent Runs", - description: - "List all subagent runs spawned by this session and their current status. " + - "Optionally pass a runId to get detailed information about a specific run. " + - "Use this to check subagent progress or when the user asks about status.", - parameters: SessionsListSchema, - execute: async (_toolCallId, args) => { - const { runId } = args as SessionsListArgs; - const requesterSessionId = options.sessionId; - - if (!requesterSessionId) { - return { - content: [{ type: "text", text: "No session ID available. Cannot list subagent runs." }], - details: { runs: [] }, - }; - } - - const now = Date.now(); - - // Detail mode: specific run - if (runId) { - const record = getSubagentRun(runId); - if (!record) { - return { - content: [{ type: "text", text: `Run not found: ${runId}` }], - details: { runs: [] }, - }; - } - if (record.requesterSessionId !== requesterSessionId) { - return { - content: [{ type: "text", text: `Run not found: ${runId}` }], - details: { runs: [] }, - }; - } - return { - content: [{ type: "text", text: formatRunDetail(record, now) }], - details: { runs: [toResultRun(record)] }, - }; - } - - // List mode: all runs for this session - const runs = listSubagentRuns(requesterSessionId); - - if (runs.length === 0) { - return { - content: [{ type: "text", text: "No subagent runs for this session." }], - details: { runs: [] }, - }; - } - - const someRunning = runs.some((r) => !r.endedAt); - - // Build status lines, grouping runs by groupId - const statusLines: string[] = []; - const groupedRuns = new Map(); - const ungroupedRuns: SubagentRunRecord[] = []; - - for (const r of runs) { - if (r.groupId) { - const list = groupedRuns.get(r.groupId) ?? []; - list.push(r); - groupedRuns.set(r.groupId, list); - } else { - ungroupedRuns.push(r); - } - } - - let idx = 0; - - // Grouped runs - for (const [gId, gRuns] of groupedRuns) { - const group = getSubagentGroup(gId); - const groupLabel = group?.label || `Group ${gId.slice(0, 8)}…`; - const done = gRuns.filter(r => r.endedAt).length; - const nextSnippet = group?.next ? ` → next: "${group.next.slice(0, 60)}${group.next.length > 60 ? "…" : ""}"` : ""; - statusLines.push(`\n 📦 ${groupLabel} (${done}/${gRuns.length} done${nextSnippet})`); - - for (const r of gRuns) { - idx++; - const displayName = r.label || r.task.slice(0, 60); - const status = resolveStatus(r); - if (status === "running") { - const elapsed = r.startedAt ? formatElapsed(now - r.startedAt) : "just spawned"; - statusLines.push(` ${idx}. [RUNNING] "${displayName}" (${elapsed}) id:${r.runId}`); - } else { - const elapsed = r.startedAt && r.endedAt ? formatElapsed(r.endedAt - r.startedAt) : ""; - const findings = r.findingsCaptured - ? (r.findings ? r.findings.slice(0, 4000) + (r.findings.length > 4000 ? "…" : "") : "(no output)") - : "(findings not yet captured)"; - statusLines.push(` ${idx}. [${status.toUpperCase()}] "${displayName}" (${elapsed}) id:${r.runId}\n Findings: ${findings}`); - } - } - } - - // Ungrouped runs - for (const r of ungroupedRuns) { - idx++; - const displayName = r.label || r.task.slice(0, 60); - const status = resolveStatus(r); - if (status === "running") { - const elapsed = r.startedAt ? formatElapsed(now - r.startedAt) : "just spawned"; - statusLines.push(` ${idx}. [RUNNING] "${displayName}" (${elapsed}) id:${r.runId}`); - } else { - const elapsed = r.startedAt && r.endedAt ? formatElapsed(r.endedAt - r.startedAt) : ""; - const findings = r.findingsCaptured - ? (r.findings ? r.findings.slice(0, 4000) + (r.findings.length > 4000 ? "…" : "") : "(no output)") - : "(findings not yet captured)"; - statusLines.push(` ${idx}. [${status.toUpperCase()}] "${displayName}" (${elapsed}) id:${r.runId}\n Findings: ${findings}`); - } - } - - const header = `Subagent runs for this session: ${runs.length} total`; - const body = statusLines.join("\n"); - - // If any subagents are still running, return status with wait instruction. - // We do NOT use steer() here — steer would cancel unrelated tool calls - // that the LLM may be processing in the same batch. - if (someRunning) { - const runningCount = runs.filter((r) => !r.endedAt).length; - return { - content: [ - { - type: "text", - text: - header + "\n" + body + "\n\n" + - `STATUS: ${runningCount} subagent(s) still running. This is normal — they need time to complete.\n` + - "ACTION REQUIRED: Do NOT call sessions_list again. Results will be delivered into your context automatically when they finish.\n" + - "Do NOT attempt to do this work yourself — the subagents are handling it.", - }, - ], - details: { runs: runs.map(toResultRun) }, - }; - } - - // All completed — normal response - return { - content: [{ type: "text", text: header + "\n" + body }], - details: { runs: runs.map(toResultRun) }, - }; - }, - }; -} diff --git a/packages/core/src/agent/tools/sessions-spawn.test.ts b/packages/core/src/agent/tools/sessions-spawn.test.ts deleted file mode 100644 index 0a0d80a9..00000000 --- a/packages/core/src/agent/tools/sessions-spawn.test.ts +++ /dev/null @@ -1,61 +0,0 @@ -import { describe, it, expect, beforeEach } from "vitest"; -import { createSessionsSpawnTool } from "./sessions-spawn.js"; -import { getSubagentGroup, resetSubagentRegistryForTests } from "../subagent/registry.js"; - -describe("sessions_spawn tool", () => { - beforeEach(() => { - resetSubagentRegistryForTests(); - }); - it("has correct name and description", () => { - const tool = createSessionsSpawnTool({ isSubagent: false, sessionId: "test-session" }); - expect(tool.name).toBe("sessions_spawn"); - expect(tool.label).toBe("Spawn Subagent"); - expect(tool.description).toContain("Spawn a background subagent"); - }); - - it("rejects spawn from subagent sessions", async () => { - const tool = createSessionsSpawnTool({ isSubagent: true, sessionId: "child-session" }); - - const result = await tool.execute( - "call-1", - { task: "do something" } as any, - new AbortController().signal, - ); - - expect(result.details.status).toBe("error"); - expect(result.details.error).toContain("not allowed from sub-agent sessions"); - const firstContent = result.content[0] as { type: string; text: string }; - expect(firstContent.text).toContain("not allowed"); - }); - - it("auto-creates group when custom groupId is provided", async () => { - const tool = createSessionsSpawnTool({ isSubagent: false, sessionId: "parent-session" }); - - // Should not error — the group is auto-created - await tool.execute( - "call-group", - { task: "research topic", label: "Research", groupId: "my-custom-group" } as any, - new AbortController().signal, - ); - - // Verify group was created in the registry - const group = getSubagentGroup("my-custom-group"); - expect(group).toBeDefined(); - expect(group!.groupId).toBe("my-custom-group"); - expect(group!.label).toBe("Group: Research"); - }); - - it("fails gracefully when Hub is not initialized", async () => { - const tool = createSessionsSpawnTool({ isSubagent: false, sessionId: "parent-session" }); - - const result = await tool.execute( - "call-2", - { task: "analyze code", label: "Code Analysis" } as any, - new AbortController().signal, - ); - - // Should get an error because Hub singleton is not set up in test - expect(result.details.status).toBe("error"); - expect(result.details.error).toContain("Hub"); - }); -}); diff --git a/packages/core/src/agent/tools/sessions-spawn.ts b/packages/core/src/agent/tools/sessions-spawn.ts deleted file mode 100644 index 030fbe27..00000000 --- a/packages/core/src/agent/tools/sessions-spawn.ts +++ /dev/null @@ -1,220 +0,0 @@ -/** - * sessions_spawn tool — allows a parent agent to spawn subagent runs. - * - * Subagents run in isolated sessions with restricted tools. - * Results are announced back to the parent when the child completes. - */ - -import { v7 as uuidv7 } from "uuid"; -import { Type } from "@sinclair/typebox"; -import type { AgentTool } from "@mariozechner/pi-agent-core"; -import { getHub } from "../../hub/hub-singleton.js"; -import { buildSubagentSystemPrompt } from "../subagent/announce.js"; -import { registerSubagentRun, createSubagentGroup, getSubagentGroup } from "../subagent/registry.js"; -import { resolveTools } from "../tools.js"; - -const SessionsSpawnSchema = Type.Object({ - task: Type.String({ description: "The task for the subagent to perform.", minLength: 1 }), - label: Type.Optional( - Type.String({ description: "Human-readable label for this background task." }), - ), - model: Type.Optional( - Type.String({ description: "Override the LLM model for the subagent (e.g. 'gpt-4o', 'claude-sonnet')." }), - ), - cleanup: Type.Optional( - Type.Union([Type.Literal("delete"), Type.Literal("keep")], { - description: "Session cleanup after completion. 'delete' removes session files, 'keep' preserves for audit. Default: 'delete'.", - }), - ), - timeoutSeconds: Type.Optional( - Type.Number({ - description: - "Execution timeout in seconds. Default: 1800 (30 min). " + - "Set to 0 for no timeout (useful for complex, long-running tasks). " + - "The subagent will be terminated if it exceeds this limit.", - minimum: 0, - }), - ), - announce: Type.Optional( - Type.Union([Type.Literal("immediate"), Type.Literal("silent")], { - description: - "Announcement mode. 'immediate' (default): findings delivered as each subagent completes. " + - "'silent': defer all announcements until every silent subagent from this session finishes, " + - "then deliver one combined report. Use 'silent' when spawning multiple subagents to collect " + - "data in parallel and you want to summarize everything at once. " + - "Ignored when groupId is provided (groups always collect all results before announcing).", - }), - ), - groupId: Type.Optional( - Type.String({ - description: - "Join an existing group. Pass the groupId returned by a previous sessions_spawn call " + - "to add this subagent to the same group. All runs in a group are announced together " + - "when the last one completes. If omitted AND 'next' is provided, a new group is created automatically.", - }), - ), - next: Type.Optional( - Type.String({ - description: - "Continuation task to execute after ALL subagents in the group complete. " + - "Only used when creating a new group (first spawn without groupId). " + - "When set, the combined findings from all subagents plus this 'next' prompt " + - "are delivered to you so you can perform follow-up work (e.g. summarize, generate reports, write files). " + - "Setting 'next' automatically creates a group and implies silent collection.", - }), - ), -}); - -type SessionsSpawnArgs = { - task: string; - label?: string; - model?: string; - cleanup?: "delete" | "keep"; - timeoutSeconds?: number; - announce?: "immediate" | "silent"; - groupId?: string; - next?: string; -}; - -export type SessionsSpawnResult = { - status: "accepted" | "error"; - childSessionId?: string; - runId?: string; - groupId?: string; - error?: string; -}; - -export interface CreateSessionsSpawnToolOptions { - /** Whether the current agent is itself a subagent */ - isSubagent?: boolean; - /** Session ID of the current (requester) agent */ - sessionId?: string; - /** Resolved provider ID of the parent agent (inherited by subagents) */ - provider?: string; -} - -export function createSessionsSpawnTool( - options: CreateSessionsSpawnToolOptions, -): AgentTool { - return { - name: "sessions_spawn", - label: "Spawn Subagent", - description: - "Spawn a background subagent to handle a specific task. The subagent runs in an isolated session with its own tool set. " + - "When it completes, its findings are delivered directly into your context automatically. " + - "After spawning, do NOT proceed with work that depends on the results — but you can still chat or do unrelated tasks. " + - "When spawning multiple subagents for a collect-then-act workflow, ALWAYS use the `next` parameter " + - "on the first spawn to define follow-up work, then pass the returned groupId to subsequent spawns. " + - "Use this for parallelizable work, long-running analysis, or tasks that benefit from isolation.", - parameters: SessionsSpawnSchema, - execute: async (_toolCallId, args) => { - const { task, label, model, cleanup = "delete", timeoutSeconds, announce, next } = args as SessionsSpawnArgs; - let { groupId } = args as SessionsSpawnArgs; - - // Guard: subagents cannot spawn subagents - if (options.isSubagent) { - return { - content: [{ type: "text", text: "Error: sessions_spawn is not allowed from sub-agent sessions." }], - details: { - status: "error", - error: "sessions_spawn is not allowed from sub-agent sessions", - }, - }; - } - - const requesterSessionId = options.sessionId ?? "unknown"; - const runId = uuidv7(); - const childSessionId = uuidv7(); - - // Auto-create group when groupId is provided but doesn't exist yet, - // or when `next` is provided without a groupId. - if (groupId) { - const existingGroup = getSubagentGroup(groupId); - if (!existingGroup) { - // LLM provided a custom groupId — auto-create the group - createSubagentGroup({ - groupId, - requesterSessionId, - label: label ? `Group: ${label}` : undefined, - next, - }); - } - } else if (next) { - groupId = uuidv7(); - createSubagentGroup({ - groupId, - requesterSessionId, - label: label ? `Group: ${label}` : undefined, - next, - }); - } - - // Resolve tools for the subagent (with isSubagent=true for policy filtering) - const subagentTools = resolveTools({ isSubagent: true }); - const toolNames = subagentTools.map((t) => t.name); - - // Build system prompt for the child - const systemPrompt = buildSubagentSystemPrompt({ - requesterSessionId, - childSessionId, - label, - task, - tools: toolNames, - }); - - // Spawn child agent via Hub - try { - const hub = getHub(); - const childAgent = hub.createSubagent(childSessionId, { - systemPrompt, - model, - provider: options.provider, - }); - - // Register the run for lifecycle tracking. - // The write is deferred via the start callback so the child only - // begins work once a concurrency slot is available in the queue. - registerSubagentRun({ - runId, - childSessionId, - requesterSessionId, - task, - label, - cleanup, - timeoutSeconds, - announce: groupId ? "silent" : announce, - groupId, - start: () => childAgent.write(task), - }); - - // Build response text - const groupInfo = groupId ? `\nGroup: ${groupId}` : ""; - const nextInfo = next ? `\nContinuation: "${next.slice(0, 100)}${next.length > 100 ? "…" : ""}"` : ""; - const responseText = - `Subagent spawned: ${label || task.slice(0, 80)}\n` + - `Run: ${runId}${groupInfo}${nextInfo}\n\n` + - `⏳ WAITING FOR RESULTS — do NOT proceed with work that depends on these results.\n` + - `Do NOT fabricate data or completion status. Results will arrive in your context automatically.`; - - return { - content: [{ type: "text", text: responseText }], - details: { - status: "accepted", - childSessionId, - runId, - groupId, - }, - }; - } catch (err) { - const message = err instanceof Error ? err.message : String(err); - return { - content: [{ type: "text", text: `Error spawning subagent: ${message}` }], - details: { - status: "error", - error: message, - }, - }; - } - }, - }; -} diff --git a/packages/ui/src/components/tool-call-item.tsx b/packages/ui/src/components/tool-call-item.tsx index 2936714d..51b5279c 100644 --- a/packages/ui/src/components/tool-call-item.tsx +++ b/packages/ui/src/components/tool-call-item.tsx @@ -39,7 +39,7 @@ const TOOL_DISPLAY: Record = { memory_set: { label: "MemorySet", icon: Database }, memory_delete: { label: "MemoryDelete", icon: Database }, memory_list: { label: "MemoryList", icon: Database }, - sessions_spawn: { label: "SpawnSession", icon: GitBranch }, + delegate: { label: "Delegate", icon: GitBranch }, data: { label: "Data", icon: BarChart3 }, } @@ -81,11 +81,12 @@ function getSubtitle(toolName: string, args?: Record): string { const ticker = params?.ticker ? String(params.ticker).toUpperCase() : "" return ticker ? `${action} ${ticker}` : action } - case "sessions_spawn": { - const label = args.label ? String(args.label) : "" - if (label) return label.length > 60 ? label.slice(0, 57) + "…" : label - const task = String(args.task ?? "") - return task.length > 60 ? task.slice(0, 57) + "…" : task + case "delegate": { + const tasks = args.tasks as Array<{ label?: string; task?: string }> | undefined + if (!tasks?.length) return "" + const labels = tasks.map((t, i) => t.label || `Task ${i + 1}`) + const summary = labels.join(", ") + return summary.length > 60 ? summary.slice(0, 57) + "…" : summary } default: return "" @@ -106,7 +107,7 @@ const RUNNING_LABELS: Record = { web_search: "searching…", web_fetch: "fetching…", data: "fetching…", - sessions_spawn: "spawning…", + delegate: "delegating…", } /** Stats derived from tool result content */ diff --git a/scripts/swe-bench/run.ts b/scripts/swe-bench/run.ts index 5e6c4b40..e35efa72 100644 --- a/scripts/swe-bench/run.ts +++ b/scripts/swe-bench/run.ts @@ -217,7 +217,7 @@ async function runTask( enableSkills: false, tools: { // Only allow coding tools — no web, no cron, no sessions - deny: ["web_fetch", "web_search", "cron", "data", "sessions_spawn", "sessions_list", "memory_search", "send_file"], + deny: ["web_fetch", "web_search", "cron", "data", "delegate", "memory_search", "send_file"], }, };