diff --git a/packages/core/src/agent/tools.ts b/packages/core/src/agent/tools.ts index 551c8245..fd9bf10a 100644 --- a/packages/core/src/agent/tools.ts +++ b/packages/core/src/agent/tools.ts @@ -14,6 +14,7 @@ import { createDataTool } from "./tools/data/index.js"; import { createSendFileTool } from "./tools/send-file.js"; import type { SendFileCallback } from "./tools/send-file.js"; import { filterTools } from "./tools/policy.js"; +import { wrapReadToolWithImageResize } from "./tools/image-resize.js"; import { isMulticaError, isRetryableError } from "@multica/utils"; import type { ExecApprovalCallback } from "./tools/exec-approval-types.js"; @@ -106,9 +107,9 @@ export function createAllTools(options: CreateToolsOptions | string): AgentTool< const opts: CreateToolsOptions = typeof options === "string" ? { cwd: options } : options; const { cwd, profileDir, isSubagent, sessionId } = opts; - const baseTools = createCodingTools(cwd).filter( - (tool) => tool.name !== "bash", - ) as AgentTool[]; + const baseTools = createCodingTools(cwd) + .filter((tool) => tool.name !== "bash") + .map((tool) => tool.name === "read" ? wrapReadToolWithImageResize(tool) : tool) as AgentTool[]; const execTool = createExecTool(cwd, opts.onExecApprovalNeeded); const processTool = createProcessTool(cwd); diff --git a/packages/core/src/agent/tools/image-resize.test.ts b/packages/core/src/agent/tools/image-resize.test.ts new file mode 100644 index 00000000..a287b671 --- /dev/null +++ b/packages/core/src/agent/tools/image-resize.test.ts @@ -0,0 +1,56 @@ +import { describe, it, expect } from "vitest"; +import { wrapReadToolWithImageResize } from "./image-resize.js"; + +describe("image-resize", () => { + function makeMockReadTool(content: any[]) { + return { + name: "read", + description: "test", + parameters: {} as any, + execute: async () => ({ content }), + }; + } + + it("should pass through non-image content unchanged", async () => { + const tool = makeMockReadTool([ + { type: "text", text: "Hello world" }, + ]); + const wrapped = wrapReadToolWithImageResize(tool as any); + const result = await wrapped.execute({} as any) as any; + expect(result.content).toHaveLength(1); + expect(result.content[0].text).toBe("Hello world"); + }); + + it("should pass through small images unchanged", async () => { + const smallBase64 = Buffer.alloc(100, 0x41).toString("base64"); + const tool = makeMockReadTool([ + { type: "image", data: smallBase64 }, + ]); + const wrapped = wrapReadToolWithImageResize(tool as any); + const result = await wrapped.execute({} as any) as any; + expect(result.content[0].data).toBe(smallBase64); + }); + + it("should pass through results without content arrays", async () => { + const tool = { + name: "read", + description: "test", + parameters: {} as any, + execute: async () => ({ text: "plain" }), + }; + const wrapped = wrapReadToolWithImageResize(tool as any); + const result = await wrapped.execute({} as any) as any; + expect(result.text).toBe("plain"); + }); + + it("should handle execution errors gracefully", async () => { + const tool = { + name: "read", + description: "test", + parameters: {} as any, + execute: async () => { throw new Error("file not found"); }, + }; + const wrapped = wrapReadToolWithImageResize(tool as any); + await expect(wrapped.execute({} as any)).rejects.toThrow("file not found"); + }); +}); diff --git a/packages/core/src/agent/tools/image-resize.ts b/packages/core/src/agent/tools/image-resize.ts new file mode 100644 index 00000000..30032ea8 --- /dev/null +++ b/packages/core/src/agent/tools/image-resize.ts @@ -0,0 +1,211 @@ +/** + * Image resize wrapper for the read tool. + * + * Wraps the read tool from pi-coding-agent to automatically downscale + * oversized images returned in tool results. Uses macOS `sips` for resize + * (no extra dependencies required). + */ + +import type { AgentTool, AgentToolResult } from "@mariozechner/pi-agent-core"; +import { execFile } from "node:child_process"; +import { writeFile, readFile, mkdtemp, rm } from "node:fs/promises"; +import { join } from "node:path"; +import { tmpdir } from "node:os"; + +/** Max decoded image binary size (1MB) */ +const MAX_IMAGE_BYTES = 1 * 1024 * 1024; + +/** Max image dimension in pixels per side */ +const MAX_IMAGE_DIMENSION_PX = 2000; + +/** JPEG quality for resized output */ +const JPEG_QUALITY = 80; + +type ContentBlock = AgentToolResult["content"][number]; + +function isImageBlock(block: unknown): block is { type: "image"; data: string; [key: string]: unknown } { + return ( + !!block && + typeof block === "object" && + (block as any).type === "image" && + typeof (block as any).data === "string" + ); +} + +/** + * Run sips command and return output buffer. + * Only available on macOS. + */ +function runSips(args: string[]): Promise { + return new Promise((resolve, reject) => { + execFile("/usr/bin/sips", args, { timeout: 20_000, maxBuffer: 1024 * 1024 }, (err, stdout) => { + if (err) reject(err); + else resolve(stdout); + }); + }); +} + +/** + * Get image dimensions via sips. + */ +async function getImageDimensions( + buffer: Buffer, + tmpDir: string, +): Promise<{ width: number; height: number } | null> { + const input = join(tmpDir, "in.img"); + await writeFile(input, buffer); + + try { + const stdout = await runSips(["-g", "pixelWidth", "-g", "pixelHeight", input]); + const w = stdout.match(/pixelWidth:\s*(\d+)/); + const h = stdout.match(/pixelHeight:\s*(\d+)/); + if (w?.[1] && h?.[1]) { + return { width: parseInt(w[1], 10), height: parseInt(h[1], 10) }; + } + } catch { + // sips not available or failed + } + return null; +} + +/** + * Resize image to JPEG via sips. + */ +async function resizeWithSips( + buffer: Buffer, + maxSide: number, + quality: number, + tmpDir: string, +): Promise { + const input = join(tmpDir, "in.img"); + const output = join(tmpDir, "out.jpg"); + await writeFile(input, buffer); + + await runSips([ + "-Z", String(maxSide), + "-s", "format", "jpeg", + "-s", "formatOptions", String(quality), + input, + "--out", output, + ]); + + return readFile(output); +} + +/** + * Check if image needs resize and perform it if necessary. + * Returns the original base64 if no resize needed or if resize fails. + */ +async function maybeResizeImage(base64Data: string): Promise<{ base64: string; mimeType?: string; resized: boolean }> { + const buffer = Buffer.from(base64Data, "base64"); + const overSize = buffer.byteLength > MAX_IMAGE_BYTES; + + // Quick check: if small enough by bytes and we can't check dimensions, pass through + if (!overSize && process.platform !== "darwin") { + return { base64: base64Data, resized: false }; + } + + // On macOS, use sips to check dimensions and resize if needed + if (process.platform === "darwin") { + const tmpDir = await mkdtemp(join(tmpdir(), "multica-img-")); + try { + const dims = await getImageDimensions(buffer, tmpDir); + + // If we can get dimensions and everything is within limits, pass through + if (dims && !overSize && dims.width <= MAX_IMAGE_DIMENSION_PX && dims.height <= MAX_IMAGE_DIMENSION_PX) { + return { base64: base64Data, resized: false }; + } + + // Need resize + const maxDim = dims ? Math.max(dims.width, dims.height) : MAX_IMAGE_DIMENSION_PX; + const targetSide = Math.min(MAX_IMAGE_DIMENSION_PX, maxDim); + const resized = await resizeWithSips(buffer, targetSide, JPEG_QUALITY, tmpDir); + + // If still too large, try progressively smaller sizes + if (resized.byteLength > MAX_IMAGE_BYTES) { + for (const side of [1600, 1200, 800]) { + const smaller = await resizeWithSips(buffer, side, JPEG_QUALITY, tmpDir); + if (smaller.byteLength <= MAX_IMAGE_BYTES) { + return { base64: smaller.toString("base64"), mimeType: "image/jpeg", resized: true }; + } + } + } + + return { base64: resized.toString("base64"), mimeType: "image/jpeg", resized: true }; + } catch { + // sips failed, pass through original + return { base64: base64Data, resized: false }; + } finally { + await rm(tmpDir, { recursive: true, force: true }).catch(() => {}); + } + } + + // Non-macOS: pass through (future: add sharp support) + return { base64: base64Data, resized: false }; +} + +/** + * Process tool result content blocks, resizing oversized images. + */ +async function processResultContent(content: ContentBlock[]): Promise { + const result: ContentBlock[] = []; + + for (const block of content) { + if (!isImageBlock(block)) { + result.push(block); + continue; + } + + const decoded = Buffer.from(block.data, "base64"); + // Skip small images entirely + if (decoded.byteLength <= MAX_IMAGE_BYTES) { + result.push(block); + continue; + } + + try { + const resized = await maybeResizeImage(block.data); + if (resized.resized) { + result.push({ ...block, data: resized.base64 } as ContentBlock); + } else { + result.push(block); + } + } catch { + result.push(block); + } + } + + return result; +} + +/** + * Wrap the read tool to automatically resize oversized images in results. + */ +export function wrapReadToolWithImageResize( + tool: AgentTool, +): AgentTool { + const originalExecute = tool.execute; + + return { + ...tool, + execute: async (...args: Parameters) => { + const result = await originalExecute(...args); + + // Only process results with content arrays + const resultAny = result as any; + if (!resultAny?.content || !Array.isArray(resultAny.content)) { + return result; + } + + // Check if there are any image blocks worth processing + const hasLargeImages = resultAny.content.some( + (block: unknown) => + isImageBlock(block) && Buffer.from((block as any).data, "base64").byteLength > MAX_IMAGE_BYTES, + ); + if (!hasLargeImages) return result; + + const processed = await processResultContent(resultAny.content); + return { ...resultAny, content: processed } as typeof result; + }, + }; +}