Merge pull request #165 from multica-ai/forrestchang/analyze-run-logs
feat(session): optimize image storage with externalization and resize
This commit is contained in:
commit
fe6fdd4ddc
5 changed files with 723 additions and 7 deletions
|
|
@ -6,6 +6,7 @@ import {
|
|||
resolveBaseDir,
|
||||
resolveSessionDir,
|
||||
resolveSessionPath,
|
||||
resolveMediaDir,
|
||||
ensureSessionDir,
|
||||
readEntries,
|
||||
appendEntry,
|
||||
|
|
@ -274,4 +275,245 @@ describe("session/storage", () => {
|
|||
expect(readFileSync(filePath, "utf8")).toBe("");
|
||||
});
|
||||
});
|
||||
|
||||
describe("image externalization", () => {
|
||||
// Generate a large base64 string (>43KB to exceed MIN_EXTERNALIZE_B64_LENGTH)
|
||||
const largeBinarySize = 50_000; // 50KB binary
|
||||
const largeBuffer = Buffer.alloc(largeBinarySize, 0x42); // fill with 'B'
|
||||
const largeBase64 = largeBuffer.toString("base64");
|
||||
|
||||
// Small base64 that should stay inline
|
||||
const smallBase64 = Buffer.alloc(100, 0x41).toString("base64");
|
||||
|
||||
function makeImageEntry(imageData: string, sessionId = "img-session"): SessionEntry {
|
||||
return {
|
||||
type: "message",
|
||||
message: {
|
||||
role: "user",
|
||||
content: [
|
||||
{ type: "text", text: "Read image file [image/png]" },
|
||||
{ type: "image", data: imageData },
|
||||
],
|
||||
} as any,
|
||||
timestamp: Date.now(),
|
||||
};
|
||||
}
|
||||
|
||||
function makeFormatBEntry(imageData: string): SessionEntry {
|
||||
return {
|
||||
type: "message",
|
||||
message: {
|
||||
role: "user",
|
||||
content: [
|
||||
{ type: "image", source: { type: "base64", data: imageData } },
|
||||
],
|
||||
} as any,
|
||||
timestamp: Date.now(),
|
||||
};
|
||||
}
|
||||
|
||||
function makeToolResultEntry(imageData: string): SessionEntry {
|
||||
return {
|
||||
type: "message",
|
||||
message: {
|
||||
role: "user",
|
||||
content: [
|
||||
{
|
||||
type: "tool_result",
|
||||
tool_use_id: "test-id",
|
||||
content: [
|
||||
{ type: "text", text: "Read image file [image/png]" },
|
||||
{ type: "image", data: imageData },
|
||||
],
|
||||
},
|
||||
],
|
||||
} as any,
|
||||
timestamp: Date.now(),
|
||||
};
|
||||
}
|
||||
|
||||
it("should externalize Format A image and create media file", async () => {
|
||||
const sessionId = "ext-format-a";
|
||||
const entry = makeImageEntry(largeBase64);
|
||||
|
||||
await appendEntry(sessionId, entry, { baseDir: testBaseDir });
|
||||
|
||||
// Read raw JSONL — should have $ref, not data
|
||||
const rawContent = readFileSync(join(testBaseDir, sessionId, "session.jsonl"), "utf8");
|
||||
const rawEntry = JSON.parse(rawContent.trim());
|
||||
expect(rawEntry.message.content[1].$ref).toMatch(/^media\/[a-f0-9]+\.bin$/);
|
||||
expect(rawEntry.message.content[1].data).toBeUndefined();
|
||||
|
||||
// Media file should exist
|
||||
const mediaDir = resolveMediaDir(sessionId, { baseDir: testBaseDir });
|
||||
const files = existsSync(mediaDir)
|
||||
? require("node:fs").readdirSync(mediaDir) as string[]
|
||||
: [];
|
||||
expect(files).toHaveLength(1);
|
||||
expect(files[0]).toMatch(/^[a-f0-9]+\.bin$/);
|
||||
|
||||
// Binary content should match original
|
||||
const binPath = join(mediaDir, files[0]!);
|
||||
const savedBuffer = readFileSync(binPath);
|
||||
expect(savedBuffer).toEqual(largeBuffer);
|
||||
});
|
||||
|
||||
it("should externalize Format B image (Anthropic source style)", async () => {
|
||||
const sessionId = "ext-format-b";
|
||||
const entry = makeFormatBEntry(largeBase64);
|
||||
|
||||
await appendEntry(sessionId, entry, { baseDir: testBaseDir });
|
||||
|
||||
const rawContent = readFileSync(join(testBaseDir, sessionId, "session.jsonl"), "utf8");
|
||||
const rawEntry = JSON.parse(rawContent.trim());
|
||||
expect(rawEntry.message.content[0].source.type).toBe("$ref");
|
||||
expect(rawEntry.message.content[0].source.path).toMatch(/^media\/[a-f0-9]+\.bin$/);
|
||||
});
|
||||
|
||||
it("should restore externalized images on read (round-trip)", async () => {
|
||||
const sessionId = "ext-roundtrip";
|
||||
const entry = makeImageEntry(largeBase64);
|
||||
|
||||
await appendEntry(sessionId, entry, { baseDir: testBaseDir });
|
||||
|
||||
const entries = readEntries(sessionId, { baseDir: testBaseDir });
|
||||
expect(entries).toHaveLength(1);
|
||||
const content = (entries[0] as any).message.content;
|
||||
expect(content[1].type).toBe("image");
|
||||
expect(content[1].data).toBe(largeBase64);
|
||||
expect(content[1].$ref).toBeUndefined();
|
||||
});
|
||||
|
||||
it("should restore Format B images on read", async () => {
|
||||
const sessionId = "ext-roundtrip-b";
|
||||
const entry = makeFormatBEntry(largeBase64);
|
||||
|
||||
await appendEntry(sessionId, entry, { baseDir: testBaseDir });
|
||||
|
||||
const entries = readEntries(sessionId, { baseDir: testBaseDir });
|
||||
expect(entries).toHaveLength(1);
|
||||
const block = (entries[0] as any).message.content[0];
|
||||
expect(block.source.type).toBe("base64");
|
||||
expect(block.source.data).toBe(largeBase64);
|
||||
});
|
||||
|
||||
it("should handle old sessions with inline base64 (backward compat)", () => {
|
||||
const sessionId = "old-inline";
|
||||
const dir = join(testBaseDir, sessionId);
|
||||
mkdirSync(dir, { recursive: true });
|
||||
|
||||
// Write raw JSONL with inline base64 (old format, no $ref)
|
||||
const entry = makeImageEntry(largeBase64);
|
||||
writeFileSync(join(dir, "session.jsonl"), `${JSON.stringify(entry)}\n`);
|
||||
|
||||
const entries = readEntries(sessionId, { baseDir: testBaseDir });
|
||||
expect(entries).toHaveLength(1);
|
||||
const content = (entries[0] as any).message.content;
|
||||
expect(content[1].data).toBe(largeBase64);
|
||||
});
|
||||
|
||||
it("should return placeholder for missing media file", () => {
|
||||
const sessionId = "missing-media";
|
||||
const dir = join(testBaseDir, sessionId);
|
||||
mkdirSync(dir, { recursive: true });
|
||||
|
||||
// Write JSONL with $ref but no media file
|
||||
const rawEntry = {
|
||||
type: "message",
|
||||
message: {
|
||||
role: "user",
|
||||
content: [
|
||||
{ type: "image", $ref: "media/deadbeef.bin" },
|
||||
],
|
||||
},
|
||||
timestamp: Date.now(),
|
||||
};
|
||||
writeFileSync(join(dir, "session.jsonl"), `${JSON.stringify(rawEntry)}\n`);
|
||||
|
||||
const entries = readEntries(sessionId, { baseDir: testBaseDir });
|
||||
expect(entries).toHaveLength(1);
|
||||
const block = (entries[0] as any).message.content[0];
|
||||
expect(block.type).toBe("text");
|
||||
expect(block.text).toContain("unavailable");
|
||||
});
|
||||
|
||||
it("should deduplicate same image data", async () => {
|
||||
const sessionId = "ext-dedup";
|
||||
const entry1 = makeImageEntry(largeBase64);
|
||||
const entry2 = makeImageEntry(largeBase64);
|
||||
|
||||
await appendEntry(sessionId, entry1, { baseDir: testBaseDir });
|
||||
await appendEntry(sessionId, entry2, { baseDir: testBaseDir });
|
||||
|
||||
const mediaDir = resolveMediaDir(sessionId, { baseDir: testBaseDir });
|
||||
const files = require("node:fs").readdirSync(mediaDir) as string[];
|
||||
expect(files).toHaveLength(1); // same hash = same file
|
||||
});
|
||||
|
||||
it("should keep small images inline", async () => {
|
||||
const sessionId = "ext-small";
|
||||
const entry = makeImageEntry(smallBase64);
|
||||
|
||||
await appendEntry(sessionId, entry, { baseDir: testBaseDir });
|
||||
|
||||
// Read raw JSONL — small image should still have data, not $ref
|
||||
const rawContent = readFileSync(join(testBaseDir, sessionId, "session.jsonl"), "utf8");
|
||||
const rawEntry = JSON.parse(rawContent.trim());
|
||||
expect(rawEntry.message.content[1].data).toBe(smallBase64);
|
||||
expect(rawEntry.message.content[1].$ref).toBeUndefined();
|
||||
|
||||
// No media dir should be created
|
||||
const mediaDir = resolveMediaDir(sessionId, { baseDir: testBaseDir });
|
||||
expect(existsSync(mediaDir)).toBe(false);
|
||||
});
|
||||
|
||||
it("should not affect non-image entries", async () => {
|
||||
const sessionId = "ext-noimg";
|
||||
const entry: SessionEntry = {
|
||||
type: "message",
|
||||
message: { role: "assistant", content: "Just text response" } as any,
|
||||
timestamp: 1000,
|
||||
};
|
||||
|
||||
await appendEntry(sessionId, entry, { baseDir: testBaseDir });
|
||||
|
||||
const rawContent = readFileSync(join(testBaseDir, sessionId, "session.jsonl"), "utf8");
|
||||
expect(rawContent.trim()).toBe(JSON.stringify(entry));
|
||||
});
|
||||
|
||||
it("should handle images inside nested tool_result content", async () => {
|
||||
const sessionId = "ext-tool-result";
|
||||
const entry = makeToolResultEntry(largeBase64);
|
||||
|
||||
await appendEntry(sessionId, entry, { baseDir: testBaseDir });
|
||||
|
||||
// Raw JSONL should have $ref inside tool_result
|
||||
const rawContent = readFileSync(join(testBaseDir, sessionId, "session.jsonl"), "utf8");
|
||||
const rawEntry = JSON.parse(rawContent.trim());
|
||||
const toolResult = rawEntry.message.content[0];
|
||||
expect(toolResult.content[1].$ref).toMatch(/^media\/[a-f0-9]+\.bin$/);
|
||||
|
||||
// Round-trip should restore
|
||||
const entries = readEntries(sessionId, { baseDir: testBaseDir });
|
||||
const restored = (entries[0] as any).message.content[0].content[1];
|
||||
expect(restored.data).toBe(largeBase64);
|
||||
expect(restored.$ref).toBeUndefined();
|
||||
});
|
||||
|
||||
it("should externalize via writeEntries (compaction path)", async () => {
|
||||
const sessionId = "ext-write-entries";
|
||||
const entry = makeImageEntry(largeBase64);
|
||||
|
||||
await writeEntries(sessionId, [entry], { baseDir: testBaseDir });
|
||||
|
||||
// Should be externalized
|
||||
const rawContent = readFileSync(join(testBaseDir, sessionId, "session.jsonl"), "utf8");
|
||||
const rawEntry = JSON.parse(rawContent.trim());
|
||||
expect(rawEntry.message.content[1].$ref).toMatch(/^media\/[a-f0-9]+\.bin$/);
|
||||
|
||||
// Round-trip
|
||||
const entries = readEntries(sessionId, { baseDir: testBaseDir });
|
||||
expect((entries[0] as any).message.content[1].data).toBe(largeBase64);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
import { join } from "path";
|
||||
import { existsSync, mkdirSync, readFileSync } from "fs";
|
||||
import { existsSync, mkdirSync, readFileSync, writeFileSync } from "fs";
|
||||
import { appendFile, writeFile } from "fs/promises";
|
||||
import { createHash } from "node:crypto";
|
||||
import type { SessionEntry } from "./types.js";
|
||||
import { DATA_DIR } from "@multica/utils";
|
||||
import { acquireSessionWriteLock } from "./session-write-lock.js";
|
||||
|
|
@ -9,6 +10,9 @@ export type SessionStorageOptions = {
|
|||
baseDir?: string | undefined;
|
||||
};
|
||||
|
||||
/** Minimum base64 data length to externalize (32KB decoded ≈ 43KB base64) */
|
||||
const MIN_EXTERNALIZE_B64_LENGTH = 43_000;
|
||||
|
||||
export function resolveBaseDir(options?: SessionStorageOptions) {
|
||||
return options?.baseDir ?? join(DATA_DIR, "sessions");
|
||||
}
|
||||
|
|
@ -21,6 +25,10 @@ export function resolveSessionPath(sessionId: string, options?: SessionStorageOp
|
|||
return join(resolveSessionDir(sessionId, options), "session.jsonl");
|
||||
}
|
||||
|
||||
export function resolveMediaDir(sessionId: string, options?: SessionStorageOptions) {
|
||||
return join(resolveSessionDir(sessionId, options), "media");
|
||||
}
|
||||
|
||||
export function ensureSessionDir(sessionId: string, options?: SessionStorageOptions) {
|
||||
const dir = resolveSessionDir(sessionId, options);
|
||||
// mkdirSync with recursive is idempotent (no-op if dir exists),
|
||||
|
|
@ -37,6 +45,200 @@ export function ensureSessionDir(sessionId: string, options?: SessionStorageOpti
|
|||
}
|
||||
}
|
||||
|
||||
// ─── Image Externalization ──────────────────────────────────────────────────
|
||||
|
||||
function contentHash(base64Data: string): string {
|
||||
const buffer = Buffer.from(base64Data, "base64");
|
||||
return createHash("sha256").update(buffer).digest("hex").slice(0, 32);
|
||||
}
|
||||
|
||||
function ensureMediaDir(sessionId: string, options?: SessionStorageOptions): void {
|
||||
const dir = resolveMediaDir(sessionId, options);
|
||||
try {
|
||||
mkdirSync(dir, { recursive: true });
|
||||
} catch (err) {
|
||||
if ((err as NodeJS.ErrnoException).code === "ENOENT") {
|
||||
mkdirSync(dir, { recursive: true });
|
||||
} else {
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function saveImageBinary(
|
||||
sessionId: string,
|
||||
hash: string,
|
||||
base64Data: string,
|
||||
options?: SessionStorageOptions,
|
||||
): void {
|
||||
ensureMediaDir(sessionId, options);
|
||||
const filePath = join(resolveMediaDir(sessionId, options), `${hash}.bin`);
|
||||
if (existsSync(filePath)) return; // dedup
|
||||
const buffer = Buffer.from(base64Data, "base64");
|
||||
writeFileSync(filePath, buffer);
|
||||
}
|
||||
|
||||
/**
|
||||
* Replace a single image content block with an external file reference.
|
||||
* Returns the original block unchanged if it's not an externalizable image.
|
||||
*/
|
||||
function externalizeBlock(
|
||||
block: any,
|
||||
sessionId: string,
|
||||
options?: SessionStorageOptions,
|
||||
): any {
|
||||
if (!block || typeof block !== "object" || block.type !== "image") return block;
|
||||
|
||||
// Format A: { type: "image", data: "<base64>" }
|
||||
if (typeof block.data === "string" && block.data.length > MIN_EXTERNALIZE_B64_LENGTH) {
|
||||
const hash = contentHash(block.data);
|
||||
const relPath = `media/${hash}.bin`;
|
||||
saveImageBinary(sessionId, hash, block.data, options);
|
||||
const { data: _removed, ...rest } = block;
|
||||
return { ...rest, $ref: relPath };
|
||||
}
|
||||
|
||||
// Format B: { type: "image", source: { type: "base64", data: "<base64>" } }
|
||||
if (
|
||||
block.source &&
|
||||
typeof block.source === "object" &&
|
||||
block.source.type === "base64" &&
|
||||
typeof block.source.data === "string" &&
|
||||
block.source.data.length > MIN_EXTERNALIZE_B64_LENGTH
|
||||
) {
|
||||
const hash = contentHash(block.source.data);
|
||||
const relPath = `media/${hash}.bin`;
|
||||
saveImageBinary(sessionId, hash, block.source.data, options);
|
||||
return { ...block, source: { type: "$ref", path: relPath } };
|
||||
}
|
||||
|
||||
return block;
|
||||
}
|
||||
|
||||
/**
|
||||
* Restore an externalized image reference back to inline base64 data.
|
||||
*/
|
||||
function internalizeBlock(
|
||||
block: any,
|
||||
sessionId: string,
|
||||
options?: SessionStorageOptions,
|
||||
): any {
|
||||
if (!block || typeof block !== "object" || block.type !== "image") return block;
|
||||
|
||||
// Format A ref: { type: "image", $ref: "media/<hash>.bin" }
|
||||
if (typeof block.$ref === "string") {
|
||||
const filePath = join(resolveSessionDir(sessionId, options), block.$ref);
|
||||
try {
|
||||
const buffer = readFileSync(filePath);
|
||||
const data = buffer.toString("base64");
|
||||
const { $ref: _removed, ...rest } = block;
|
||||
return { ...rest, data };
|
||||
} catch {
|
||||
return { type: "text", text: "[Image unavailable: referenced media file not found]" };
|
||||
}
|
||||
}
|
||||
|
||||
// Format B ref: { type: "image", source: { type: "$ref", path: "media/<hash>.bin" } }
|
||||
if (block.source && typeof block.source === "object" && block.source.type === "$ref") {
|
||||
const filePath = join(resolveSessionDir(sessionId, options), block.source.path);
|
||||
try {
|
||||
const buffer = readFileSync(filePath);
|
||||
const data = buffer.toString("base64");
|
||||
return { ...block, source: { type: "base64", data } };
|
||||
} catch {
|
||||
return { type: "text", text: "[Image unavailable: referenced media file not found]" };
|
||||
}
|
||||
}
|
||||
|
||||
return block;
|
||||
}
|
||||
|
||||
/**
|
||||
* Walk content blocks (including nested tool_result.content) and apply a transform.
|
||||
*/
|
||||
function transformContentBlocks(
|
||||
content: any[],
|
||||
transformBlock: (block: any) => any,
|
||||
): { content: any[]; changed: boolean } {
|
||||
let changed = false;
|
||||
const result: any[] = [];
|
||||
|
||||
for (const block of content) {
|
||||
// Handle nested tool_result content
|
||||
if (block && typeof block === "object" && block.type === "tool_result" && Array.isArray(block.content)) {
|
||||
const inner = transformContentBlocks(block.content, transformBlock);
|
||||
if (inner.changed) {
|
||||
changed = true;
|
||||
result.push({ ...block, content: inner.content });
|
||||
} else {
|
||||
result.push(block);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
const transformed = transformBlock(block);
|
||||
if (transformed !== block) changed = true;
|
||||
result.push(transformed);
|
||||
}
|
||||
|
||||
return { content: result, changed };
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract base64 image data from a session entry, save as binary files,
|
||||
* and replace with file references.
|
||||
*/
|
||||
function externalizeImages(
|
||||
entry: SessionEntry,
|
||||
sessionId: string,
|
||||
options?: SessionStorageOptions,
|
||||
): SessionEntry {
|
||||
if (entry.type !== "message") return entry;
|
||||
|
||||
const message = entry.message as any;
|
||||
const content = message.content;
|
||||
if (!Array.isArray(content)) return entry;
|
||||
|
||||
const result = transformContentBlocks(content, (block) =>
|
||||
externalizeBlock(block, sessionId, options),
|
||||
);
|
||||
|
||||
if (!result.changed) return entry;
|
||||
|
||||
return {
|
||||
...entry,
|
||||
message: { ...message, content: result.content },
|
||||
} as SessionEntry;
|
||||
}
|
||||
|
||||
/**
|
||||
* Resolve external file references in a session entry back to inline base64 data.
|
||||
*/
|
||||
function internalizeImages(
|
||||
entry: SessionEntry,
|
||||
sessionId: string,
|
||||
options?: SessionStorageOptions,
|
||||
): SessionEntry {
|
||||
if (entry.type !== "message") return entry;
|
||||
|
||||
const message = entry.message as any;
|
||||
const content = message.content;
|
||||
if (!Array.isArray(content)) return entry;
|
||||
|
||||
const result = transformContentBlocks(content, (block) =>
|
||||
internalizeBlock(block, sessionId, options),
|
||||
);
|
||||
|
||||
if (!result.changed) return entry;
|
||||
|
||||
return {
|
||||
...entry,
|
||||
message: { ...message, content: result.content },
|
||||
} as SessionEntry;
|
||||
}
|
||||
|
||||
// ─── Public API ─────────────────────────────────────────────────────────────
|
||||
|
||||
export function readEntries(sessionId: string, options?: SessionStorageOptions): SessionEntry[] {
|
||||
const filePath = resolveSessionPath(sessionId, options);
|
||||
if (!existsSync(filePath)) return [];
|
||||
|
|
@ -45,7 +247,8 @@ export function readEntries(sessionId: string, options?: SessionStorageOptions):
|
|||
const entries: SessionEntry[] = [];
|
||||
for (const line of lines) {
|
||||
try {
|
||||
entries.push(JSON.parse(line) as SessionEntry);
|
||||
const raw = JSON.parse(line) as SessionEntry;
|
||||
entries.push(internalizeImages(raw, sessionId, options));
|
||||
} catch {
|
||||
// Skip malformed lines
|
||||
}
|
||||
|
|
@ -62,7 +265,8 @@ export async function appendEntry(
|
|||
const filePath = resolveSessionPath(sessionId, options);
|
||||
const lock = await acquireSessionWriteLock({ sessionFile: filePath });
|
||||
try {
|
||||
await appendFile(filePath, `${JSON.stringify(entry)}\n`, "utf8");
|
||||
const externalized = externalizeImages(entry, sessionId, options);
|
||||
await appendFile(filePath, `${JSON.stringify(externalized)}\n`, "utf8");
|
||||
} finally {
|
||||
await lock.release();
|
||||
}
|
||||
|
|
@ -77,7 +281,9 @@ export async function writeEntries(
|
|||
const filePath = resolveSessionPath(sessionId, options);
|
||||
const lock = await acquireSessionWriteLock({ sessionFile: filePath });
|
||||
try {
|
||||
const content = entries.map((entry) => JSON.stringify(entry)).join("\n");
|
||||
const content = entries
|
||||
.map((entry) => JSON.stringify(externalizeImages(entry, sessionId, options)))
|
||||
.join("\n");
|
||||
await writeFile(filePath, content ? `${content}\n` : "", "utf8");
|
||||
} finally {
|
||||
await lock.release();
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@ import { createDataTool } from "./tools/data/index.js";
|
|||
import { createSendFileTool } from "./tools/send-file.js";
|
||||
import type { SendFileCallback } from "./tools/send-file.js";
|
||||
import { filterTools } from "./tools/policy.js";
|
||||
import { wrapReadToolWithImageResize } from "./tools/image-resize.js";
|
||||
import { isMulticaError, isRetryableError } from "@multica/utils";
|
||||
import type { ExecApprovalCallback } from "./tools/exec-approval-types.js";
|
||||
|
||||
|
|
@ -106,9 +107,9 @@ export function createAllTools(options: CreateToolsOptions | string): AgentTool<
|
|||
const opts: CreateToolsOptions = typeof options === "string" ? { cwd: options } : options;
|
||||
const { cwd, profileDir, isSubagent, sessionId } = opts;
|
||||
|
||||
const baseTools = createCodingTools(cwd).filter(
|
||||
(tool) => tool.name !== "bash",
|
||||
) as AgentTool<any>[];
|
||||
const baseTools = createCodingTools(cwd)
|
||||
.filter((tool) => tool.name !== "bash")
|
||||
.map((tool) => tool.name === "read" ? wrapReadToolWithImageResize(tool) : tool) as AgentTool<any>[];
|
||||
|
||||
const execTool = createExecTool(cwd, opts.onExecApprovalNeeded);
|
||||
const processTool = createProcessTool(cwd);
|
||||
|
|
|
|||
56
packages/core/src/agent/tools/image-resize.test.ts
Normal file
56
packages/core/src/agent/tools/image-resize.test.ts
Normal file
|
|
@ -0,0 +1,56 @@
|
|||
import { describe, it, expect } from "vitest";
|
||||
import { wrapReadToolWithImageResize } from "./image-resize.js";
|
||||
|
||||
describe("image-resize", () => {
|
||||
function makeMockReadTool(content: any[]) {
|
||||
return {
|
||||
name: "read",
|
||||
description: "test",
|
||||
parameters: {} as any,
|
||||
execute: async () => ({ content }),
|
||||
};
|
||||
}
|
||||
|
||||
it("should pass through non-image content unchanged", async () => {
|
||||
const tool = makeMockReadTool([
|
||||
{ type: "text", text: "Hello world" },
|
||||
]);
|
||||
const wrapped = wrapReadToolWithImageResize(tool as any);
|
||||
const result = await wrapped.execute({} as any) as any;
|
||||
expect(result.content).toHaveLength(1);
|
||||
expect(result.content[0].text).toBe("Hello world");
|
||||
});
|
||||
|
||||
it("should pass through small images unchanged", async () => {
|
||||
const smallBase64 = Buffer.alloc(100, 0x41).toString("base64");
|
||||
const tool = makeMockReadTool([
|
||||
{ type: "image", data: smallBase64 },
|
||||
]);
|
||||
const wrapped = wrapReadToolWithImageResize(tool as any);
|
||||
const result = await wrapped.execute({} as any) as any;
|
||||
expect(result.content[0].data).toBe(smallBase64);
|
||||
});
|
||||
|
||||
it("should pass through results without content arrays", async () => {
|
||||
const tool = {
|
||||
name: "read",
|
||||
description: "test",
|
||||
parameters: {} as any,
|
||||
execute: async () => ({ text: "plain" }),
|
||||
};
|
||||
const wrapped = wrapReadToolWithImageResize(tool as any);
|
||||
const result = await wrapped.execute({} as any) as any;
|
||||
expect(result.text).toBe("plain");
|
||||
});
|
||||
|
||||
it("should handle execution errors gracefully", async () => {
|
||||
const tool = {
|
||||
name: "read",
|
||||
description: "test",
|
||||
parameters: {} as any,
|
||||
execute: async () => { throw new Error("file not found"); },
|
||||
};
|
||||
const wrapped = wrapReadToolWithImageResize(tool as any);
|
||||
await expect(wrapped.execute({} as any)).rejects.toThrow("file not found");
|
||||
});
|
||||
});
|
||||
211
packages/core/src/agent/tools/image-resize.ts
Normal file
211
packages/core/src/agent/tools/image-resize.ts
Normal file
|
|
@ -0,0 +1,211 @@
|
|||
/**
|
||||
* Image resize wrapper for the read tool.
|
||||
*
|
||||
* Wraps the read tool from pi-coding-agent to automatically downscale
|
||||
* oversized images returned in tool results. Uses macOS `sips` for resize
|
||||
* (no extra dependencies required).
|
||||
*/
|
||||
|
||||
import type { AgentTool, AgentToolResult } from "@mariozechner/pi-agent-core";
|
||||
import { execFile } from "node:child_process";
|
||||
import { writeFile, readFile, mkdtemp, rm } from "node:fs/promises";
|
||||
import { join } from "node:path";
|
||||
import { tmpdir } from "node:os";
|
||||
|
||||
/** Max decoded image binary size (1MB) */
|
||||
const MAX_IMAGE_BYTES = 1 * 1024 * 1024;
|
||||
|
||||
/** Max image dimension in pixels per side */
|
||||
const MAX_IMAGE_DIMENSION_PX = 2000;
|
||||
|
||||
/** JPEG quality for resized output */
|
||||
const JPEG_QUALITY = 80;
|
||||
|
||||
type ContentBlock = AgentToolResult<unknown>["content"][number];
|
||||
|
||||
function isImageBlock(block: unknown): block is { type: "image"; data: string; [key: string]: unknown } {
|
||||
return (
|
||||
!!block &&
|
||||
typeof block === "object" &&
|
||||
(block as any).type === "image" &&
|
||||
typeof (block as any).data === "string"
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Run sips command and return output buffer.
|
||||
* Only available on macOS.
|
||||
*/
|
||||
function runSips(args: string[]): Promise<string> {
|
||||
return new Promise((resolve, reject) => {
|
||||
execFile("/usr/bin/sips", args, { timeout: 20_000, maxBuffer: 1024 * 1024 }, (err, stdout) => {
|
||||
if (err) reject(err);
|
||||
else resolve(stdout);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Get image dimensions via sips.
|
||||
*/
|
||||
async function getImageDimensions(
|
||||
buffer: Buffer,
|
||||
tmpDir: string,
|
||||
): Promise<{ width: number; height: number } | null> {
|
||||
const input = join(tmpDir, "in.img");
|
||||
await writeFile(input, buffer);
|
||||
|
||||
try {
|
||||
const stdout = await runSips(["-g", "pixelWidth", "-g", "pixelHeight", input]);
|
||||
const w = stdout.match(/pixelWidth:\s*(\d+)/);
|
||||
const h = stdout.match(/pixelHeight:\s*(\d+)/);
|
||||
if (w?.[1] && h?.[1]) {
|
||||
return { width: parseInt(w[1], 10), height: parseInt(h[1], 10) };
|
||||
}
|
||||
} catch {
|
||||
// sips not available or failed
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Resize image to JPEG via sips.
|
||||
*/
|
||||
async function resizeWithSips(
|
||||
buffer: Buffer,
|
||||
maxSide: number,
|
||||
quality: number,
|
||||
tmpDir: string,
|
||||
): Promise<Buffer> {
|
||||
const input = join(tmpDir, "in.img");
|
||||
const output = join(tmpDir, "out.jpg");
|
||||
await writeFile(input, buffer);
|
||||
|
||||
await runSips([
|
||||
"-Z", String(maxSide),
|
||||
"-s", "format", "jpeg",
|
||||
"-s", "formatOptions", String(quality),
|
||||
input,
|
||||
"--out", output,
|
||||
]);
|
||||
|
||||
return readFile(output);
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if image needs resize and perform it if necessary.
|
||||
* Returns the original base64 if no resize needed or if resize fails.
|
||||
*/
|
||||
async function maybeResizeImage(base64Data: string): Promise<{ base64: string; mimeType?: string; resized: boolean }> {
|
||||
const buffer = Buffer.from(base64Data, "base64");
|
||||
const overSize = buffer.byteLength > MAX_IMAGE_BYTES;
|
||||
|
||||
// Quick check: if small enough by bytes and we can't check dimensions, pass through
|
||||
if (!overSize && process.platform !== "darwin") {
|
||||
return { base64: base64Data, resized: false };
|
||||
}
|
||||
|
||||
// On macOS, use sips to check dimensions and resize if needed
|
||||
if (process.platform === "darwin") {
|
||||
const tmpDir = await mkdtemp(join(tmpdir(), "multica-img-"));
|
||||
try {
|
||||
const dims = await getImageDimensions(buffer, tmpDir);
|
||||
|
||||
// If we can get dimensions and everything is within limits, pass through
|
||||
if (dims && !overSize && dims.width <= MAX_IMAGE_DIMENSION_PX && dims.height <= MAX_IMAGE_DIMENSION_PX) {
|
||||
return { base64: base64Data, resized: false };
|
||||
}
|
||||
|
||||
// Need resize
|
||||
const maxDim = dims ? Math.max(dims.width, dims.height) : MAX_IMAGE_DIMENSION_PX;
|
||||
const targetSide = Math.min(MAX_IMAGE_DIMENSION_PX, maxDim);
|
||||
const resized = await resizeWithSips(buffer, targetSide, JPEG_QUALITY, tmpDir);
|
||||
|
||||
// If still too large, try progressively smaller sizes
|
||||
if (resized.byteLength > MAX_IMAGE_BYTES) {
|
||||
for (const side of [1600, 1200, 800]) {
|
||||
const smaller = await resizeWithSips(buffer, side, JPEG_QUALITY, tmpDir);
|
||||
if (smaller.byteLength <= MAX_IMAGE_BYTES) {
|
||||
return { base64: smaller.toString("base64"), mimeType: "image/jpeg", resized: true };
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return { base64: resized.toString("base64"), mimeType: "image/jpeg", resized: true };
|
||||
} catch {
|
||||
// sips failed, pass through original
|
||||
return { base64: base64Data, resized: false };
|
||||
} finally {
|
||||
await rm(tmpDir, { recursive: true, force: true }).catch(() => {});
|
||||
}
|
||||
}
|
||||
|
||||
// Non-macOS: pass through (future: add sharp support)
|
||||
return { base64: base64Data, resized: false };
|
||||
}
|
||||
|
||||
/**
|
||||
* Process tool result content blocks, resizing oversized images.
|
||||
*/
|
||||
async function processResultContent(content: ContentBlock[]): Promise<ContentBlock[]> {
|
||||
const result: ContentBlock[] = [];
|
||||
|
||||
for (const block of content) {
|
||||
if (!isImageBlock(block)) {
|
||||
result.push(block);
|
||||
continue;
|
||||
}
|
||||
|
||||
const decoded = Buffer.from(block.data, "base64");
|
||||
// Skip small images entirely
|
||||
if (decoded.byteLength <= MAX_IMAGE_BYTES) {
|
||||
result.push(block);
|
||||
continue;
|
||||
}
|
||||
|
||||
try {
|
||||
const resized = await maybeResizeImage(block.data);
|
||||
if (resized.resized) {
|
||||
result.push({ ...block, data: resized.base64 } as ContentBlock);
|
||||
} else {
|
||||
result.push(block);
|
||||
}
|
||||
} catch {
|
||||
result.push(block);
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Wrap the read tool to automatically resize oversized images in results.
|
||||
*/
|
||||
export function wrapReadToolWithImageResize(
|
||||
tool: AgentTool<any, any>,
|
||||
): AgentTool<any, any> {
|
||||
const originalExecute = tool.execute;
|
||||
|
||||
return {
|
||||
...tool,
|
||||
execute: async (...args: Parameters<typeof originalExecute>) => {
|
||||
const result = await originalExecute(...args);
|
||||
|
||||
// Only process results with content arrays
|
||||
const resultAny = result as any;
|
||||
if (!resultAny?.content || !Array.isArray(resultAny.content)) {
|
||||
return result;
|
||||
}
|
||||
|
||||
// Check if there are any image blocks worth processing
|
||||
const hasLargeImages = resultAny.content.some(
|
||||
(block: unknown) =>
|
||||
isImageBlock(block) && Buffer.from((block as any).data, "base64").byteLength > MAX_IMAGE_BYTES,
|
||||
);
|
||||
if (!hasLargeImages) return result;
|
||||
|
||||
const processed = await processResultContent(resultAny.content);
|
||||
return { ...resultAny, content: processed } as typeof result;
|
||||
},
|
||||
};
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue