diff --git a/src/agent/cli/output.test.ts b/src/agent/cli/output.test.ts index 3a6f6030..f292dd0e 100644 --- a/src/agent/cli/output.test.ts +++ b/src/agent/cli/output.test.ts @@ -112,6 +112,14 @@ describe("output", () => { expect(extractResultDetails(result)).toEqual(result); }); + it("should prefer details when present", () => { + const result = { + content: [{ type: "text", text: "not json" }], + details: { count: 3, truncated: false }, + }; + expect(extractResultDetails(result)).toEqual({ count: 3, truncated: false }); + }); + it("should return direct object if no content array", () => { const result = { count: 10, truncated: true }; expect(extractResultDetails(result)).toEqual({ count: 10, truncated: true }); diff --git a/src/agent/cli/output.ts b/src/agent/cli/output.ts index ba3d465e..b2f77cfa 100644 --- a/src/agent/cli/output.ts +++ b/src/agent/cli/output.ts @@ -109,6 +109,11 @@ export function extractResultDetails(result: unknown): Record | } } + const withDetails = result as { details?: unknown }; + if (withDetails.details && typeof withDetails.details === "object") { + return withDetails.details as Record; + } + // Try direct object access return result as Record; } @@ -243,8 +248,18 @@ export function createAgentOutput(params: { } case "tool_execution_end": { // Stop spinner and show final result with summary - if (event.isError) { - const errorText = extractText(event.result) || "Tool failed"; + const details = extractResultDetails(event.result); + const errorField = details?.error; + const hasError = + event.isError || + Boolean(errorField) || + details?.success === false; + if (hasError) { + const errorText = + (typeof details?.message === "string" && details.message) || + (typeof errorField === "string" && errorField) || + extractText(event.result) || + "Tool failed"; const bullet = colors.toolError("✗"); const title = colors.toolName(toolDisplayName(event.toolName)); spinner.stop(`${bullet} ${title}: ${colors.toolError(errorText)}`); diff --git a/src/agent/tools.ts b/src/agent/tools.ts index a7c150bd..8190cb86 100644 --- a/src/agent/tools.ts +++ b/src/agent/tools.ts @@ -1,12 +1,13 @@ import type { AgentOptions } from "./types.js"; import { createCodingTools } from "@mariozechner/pi-coding-agent"; -import type { AgentTool } from "@mariozechner/pi-agent-core"; +import type { AgentTool, AgentToolResult } from "@mariozechner/pi-agent-core"; import { createExecTool } from "./tools/exec.js"; import { createProcessTool } from "./tools/process.js"; import { createGlobTool } from "./tools/glob.js"; import { createWebFetchTool, createWebSearchTool } from "./tools/web/index.js"; import { createMemoryTools } from "./tools/memory/index.js"; import { filterTools } from "./tools/policy.js"; +import { isMulticaError, isRetryableError } from "../shared/errors.js"; // Re-export resolveModel from providers for backwards compatibility export { resolveModel } from "./providers/index.js"; @@ -20,6 +21,66 @@ export interface CreateToolsOptions { profileBaseDir?: string | undefined; } +type ToolErrorPayload = { + error: true; + message: string; + name?: string; + code?: string; + retryable?: boolean; + details?: Record; +}; + +function toToolErrorPayload(error: unknown): ToolErrorPayload { + if (isMulticaError(error)) { + return { + error: true, + message: error.message, + name: error.name, + code: error.code, + retryable: error.retryable, + details: error.details, + }; + } + + if (error instanceof Error) { + return { + error: true, + message: error.message, + name: error.name, + retryable: isRetryableError(error), + }; + } + + return { + error: true, + message: String(error), + }; +} + +function toolErrorResult(error: unknown): AgentToolResult { + const payload = toToolErrorPayload(error); + return { + content: [{ type: "text", text: JSON.stringify(payload, null, 2) }], + details: payload, + }; +} + +function wrapTool( + tool: AgentTool, +): AgentTool { + const execute = tool.execute; + return { + ...tool, + execute: async (...args) => { + try { + return await execute(...args); + } catch (error) { + return toolErrorResult(error) as AgentToolResult; + } + }, + }; +} + /** * Create all available tools. * This returns the full set before policy filtering. @@ -86,7 +147,7 @@ export function resolveTools(options: AgentOptions): AgentTool[] { isSubagent: options.isSubagent, }); - return filtered; + return filtered.map((tool) => wrapTool(tool)); } /** diff --git a/src/agent/tools/policy.test.ts b/src/agent/tools/policy.test.ts index 1862fd6f..e0902708 100644 --- a/src/agent/tools/policy.test.ts +++ b/src/agent/tools/policy.test.ts @@ -1,32 +1,6 @@ -/** - * Tests for tool policy system. - * Run with: npx tsx src/agent/tools/policy.test.ts - */ - -import { filterTools, type ToolsConfig } from "./policy.js"; -import { TOOL_GROUPS, TOOL_PROFILES, expandToolGroups } from "./groups.js"; - -// Simple test helper -function test(name: string, fn: () => void) { - try { - fn(); - console.log(`✓ ${name}`); - } catch (e) { - console.error(`✗ ${name}`); - console.error(e); - process.exit(1); - } -} - -function assertEqual(actual: T, expected: T, msg?: string) { - const actualStr = JSON.stringify(actual); - const expectedStr = JSON.stringify(expected); - if (actualStr !== expectedStr) { - throw new Error( - `${msg || "Assertion failed"}\n Expected: ${expectedStr}\n Actual: ${actualStr}`, - ); - } -} +import { describe, it, expect } from "vitest"; +import { filterTools } from "./policy.js"; +import { TOOL_PROFILES, expandToolGroups } from "./groups.js"; // Mock tools for testing const mockTools = [ @@ -40,177 +14,171 @@ const mockTools = [ { name: "web_search" }, ] as any[]; -console.log("=== Tool Groups Tests ===\n"); - -test("expandToolGroups: group:fs", () => { - const expanded = expandToolGroups(["group:fs"]); - assertEqual(expanded.sort(), ["edit", "glob", "read", "write"]); -}); - -test("expandToolGroups: group:runtime", () => { - const expanded = expandToolGroups(["group:runtime"]); - assertEqual(expanded.sort(), ["exec", "process"]); -}); - -test("expandToolGroups: group:web", () => { - const expanded = expandToolGroups(["group:web"]); - assertEqual(expanded.sort(), ["web_fetch", "web_search"]); -}); - -test("expandToolGroups: mixed groups and tools", () => { - const expanded = expandToolGroups(["group:runtime", "web_fetch"]); - assertEqual(expanded.sort(), ["exec", "process", "web_fetch"]); -}); - -console.log("\n=== Tool Profiles Tests ===\n"); - -test("TOOL_PROFILES: minimal has empty allow", () => { - assertEqual(TOOL_PROFILES.minimal.allow, []); -}); - -test("TOOL_PROFILES: coding has fs and runtime", () => { - assertEqual(TOOL_PROFILES.coding.allow, ["group:fs", "group:runtime"]); -}); - -test("TOOL_PROFILES: full has no restrictions", () => { - assertEqual(TOOL_PROFILES.full.allow, undefined); - assertEqual(TOOL_PROFILES.full.deny, undefined); -}); - -console.log("\n=== Filter Tests ===\n"); - -test("filterTools: no config returns all tools", () => { - const filtered = filterTools(mockTools, {}); - assertEqual(filtered.length, mockTools.length); -}); - -test("filterTools: minimal profile returns no tools", () => { - const filtered = filterTools(mockTools, { config: { profile: "minimal" } }); - assertEqual(filtered.length, 0); -}); - -test("filterTools: coding profile returns fs and runtime", () => { - const filtered = filterTools(mockTools, { config: { profile: "coding" } }); - const names = filtered.map((t) => t.name).sort(); - assertEqual(names, ["edit", "exec", "glob", "process", "read", "write"]); -}); - -test("filterTools: web profile returns all", () => { - const filtered = filterTools(mockTools, { config: { profile: "web" } }); - const names = filtered.map((t) => t.name).sort(); - assertEqual(names, [ - "edit", - "exec", - "glob", - "process", - "read", - "web_fetch", - "web_search", - "write", - ]); -}); - -test("filterTools: full profile returns all tools", () => { - const filtered = filterTools(mockTools, { config: { profile: "full" } }); - assertEqual(filtered.length, mockTools.length); -}); - -test("filterTools: deny specific tool", () => { - const filtered = filterTools(mockTools, { config: { deny: ["exec"] } }); - const names = filtered.map((t) => t.name); - assertEqual(names.includes("exec"), false); - assertEqual(names.length, mockTools.length - 1); -}); - -test("filterTools: allow specific tools", () => { - const filtered = filterTools(mockTools, { - config: { allow: ["read", "write"] }, +describe("tool groups", () => { + it("expandToolGroups: group:fs", () => { + const expanded = expandToolGroups(["group:fs"]); + expect(expanded.sort()).toEqual(["edit", "glob", "read", "write"]); }); - const names = filtered.map((t) => t.name).sort(); - assertEqual(names, ["read", "write"]); -}); -test("filterTools: deny takes precedence over allow", () => { - const filtered = filterTools(mockTools, { - config: { allow: ["read", "write", "exec"], deny: ["exec"] }, + it("expandToolGroups: group:runtime", () => { + const expanded = expandToolGroups(["group:runtime"]); + expect(expanded.sort()).toEqual(["exec", "process"]); + }); + + it("expandToolGroups: group:web", () => { + const expanded = expandToolGroups(["group:web"]); + expect(expanded.sort()).toEqual(["web_fetch", "web_search"]); + }); + + it("expandToolGroups: mixed groups and tools", () => { + const expanded = expandToolGroups(["group:runtime", "web_fetch"]); + expect(expanded.sort()).toEqual(["exec", "process", "web_fetch"]); }); - const names = filtered.map((t) => t.name).sort(); - assertEqual(names, ["read", "write"]); }); -console.log("\n=== Provider-specific Tests ===\n"); +describe("tool profiles", () => { + it("minimal has empty allow", () => { + expect(TOOL_PROFILES.minimal.allow).toEqual([]); + }); -test("filterTools: provider-specific deny", () => { - const filtered = filterTools(mockTools, { - config: { - byProvider: { - google: { deny: ["exec", "process"] }, + it("coding has fs and runtime", () => { + expect(TOOL_PROFILES.coding.allow).toEqual(["group:fs", "group:runtime"]); + }); + + it("full has no restrictions", () => { + expect(TOOL_PROFILES.full.allow).toBeUndefined(); + expect(TOOL_PROFILES.full.deny).toBeUndefined(); + }); +}); + +describe("filterTools", () => { + it("no config returns all tools", () => { + const filtered = filterTools(mockTools, {}); + expect(filtered.length).toBe(mockTools.length); + }); + + it("minimal profile returns no tools", () => { + const filtered = filterTools(mockTools, { config: { profile: "minimal" } }); + expect(filtered.length).toBe(0); + }); + + it("coding profile returns fs and runtime", () => { + const filtered = filterTools(mockTools, { config: { profile: "coding" } }); + const names = filtered.map((t) => t.name).sort(); + expect(names).toEqual(["edit", "exec", "glob", "process", "read", "write"]); + }); + + it("web profile returns all", () => { + const filtered = filterTools(mockTools, { config: { profile: "web" } }); + const names = filtered.map((t) => t.name).sort(); + expect(names).toEqual([ + "edit", + "exec", + "glob", + "process", + "read", + "web_fetch", + "web_search", + "write", + ]); + }); + + it("full profile returns all tools", () => { + const filtered = filterTools(mockTools, { config: { profile: "full" } }); + expect(filtered.length).toBe(mockTools.length); + }); + + it("deny specific tool", () => { + const filtered = filterTools(mockTools, { config: { deny: ["exec"] } }); + const names = filtered.map((t) => t.name); + expect(names.includes("exec")).toBe(false); + expect(names.length).toBe(mockTools.length - 1); + }); + + it("allow specific tools", () => { + const filtered = filterTools(mockTools, { + config: { allow: ["read", "write"] }, + }); + const names = filtered.map((t) => t.name).sort(); + expect(names).toEqual(["read", "write"]); + }); + + it("deny takes precedence over allow", () => { + const filtered = filterTools(mockTools, { + config: { allow: ["read", "write", "exec"], deny: ["exec"] }, + }); + const names = filtered.map((t) => t.name).sort(); + expect(names).toEqual(["read", "write"]); + }); +}); + +describe("provider-specific filtering", () => { + it("provider-specific deny", () => { + const filtered = filterTools(mockTools, { + config: { + byProvider: { + google: { deny: ["exec", "process"] }, + }, }, - }, - provider: "google", + provider: "google", + }); + const names = filtered.map((t) => t.name); + expect(names.includes("exec")).toBe(false); + expect(names.includes("process")).toBe(false); + expect(names.length).toBe(mockTools.length - 2); }); - const names = filtered.map((t) => t.name); - assertEqual(names.includes("exec"), false); - assertEqual(names.includes("process"), false); - assertEqual(names.length, mockTools.length - 2); -}); -test("filterTools: provider not matching does not apply", () => { - const filtered = filterTools(mockTools, { - config: { - byProvider: { - google: { deny: ["exec", "process"] }, + it("provider not matching does not apply", () => { + const filtered = filterTools(mockTools, { + config: { + byProvider: { + google: { deny: ["exec", "process"] }, + }, }, - }, - provider: "openai", + provider: "openai", + }); + expect(filtered.length).toBe(mockTools.length); }); - assertEqual(filtered.length, mockTools.length); }); -console.log("\n=== Subagent Tests ===\n"); - -test("filterTools: subagent restrictions apply", () => { - // Currently DEFAULT_SUBAGENT_TOOL_DENY is empty, so no tools are denied - const filtered = filterTools(mockTools, { isSubagent: true }); - // With empty deny list, all tools are allowed - assertEqual(filtered.length, mockTools.length); -}); - -console.log("\n=== Combined Tests ===\n"); - -test("filterTools: profile + deny", () => { - const filtered = filterTools(mockTools, { - config: { - profile: "coding", - deny: ["exec"], - }, +describe("subagent restrictions", () => { + it("subagent restrictions apply", () => { + const filtered = filterTools(mockTools, { isSubagent: true }); + expect(filtered.length).toBe(mockTools.length); }); - const names = filtered.map((t) => t.name).sort(); - // coding = fs + runtime, minus exec - assertEqual(names, ["edit", "glob", "process", "read", "write"]); }); -test("filterTools: profile + provider deny", () => { - const filtered = filterTools(mockTools, { - config: { - profile: "web", - byProvider: { - google: { deny: ["exec"] }, +describe("combined filtering", () => { + it("profile + deny", () => { + const filtered = filterTools(mockTools, { + config: { + profile: "coding", + deny: ["exec"], }, - }, - provider: "google", + }); + const names = filtered.map((t) => t.name).sort(); + expect(names).toEqual(["edit", "glob", "process", "read", "write"]); }); - const names = filtered.map((t) => t.name).sort(); - // web profile - exec - assertEqual(names, [ - "edit", - "glob", - "process", - "read", - "web_fetch", - "web_search", - "write", - ]); -}); -console.log("\n=== All tests passed! ===\n"); + it("profile + provider deny", () => { + const filtered = filterTools(mockTools, { + config: { + profile: "web", + byProvider: { + google: { deny: ["exec"] }, + }, + }, + provider: "google", + }); + const names = filtered.map((t) => t.name).sort(); + expect(names).toEqual([ + "edit", + "glob", + "process", + "read", + "web_fetch", + "web_search", + "write", + ]); + }); +});