feat(agent): add web_fetch and web_search tools (#20)

* chore(deps): add web tools dependencies

Add undici, @mozilla/readability, linkedom, and turndown for
web fetching and HTML content extraction capabilities.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

* feat(agent): add web_fetch and web_search tools

Port network tools from moltbot with the following features:

web_fetch:
- Fetch and extract readable content from URLs
- Dual extraction mode: readability (smart) or turndown (full page)
- SSRF protection (blocks private IPs, localhost)
- Response caching with TTL
- Redirect handling

web_search:
- Brave Search API for traditional results
- Perplexity API for AI-synthesized answers
- Region/language settings
- Freshness filtering (Brave only)
- Auto-detects provider from available API keys

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

* feat(agent): register web_fetch and web_search tools

Add web tools to the agent's tool registry.

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>

---------

Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
Jiayuan 2026-01-30 05:09:16 +08:00 committed by GitHub
parent 028c3a2911
commit 9b3ffd1e90
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
10 changed files with 1568 additions and 1 deletions

View file

@ -23,6 +23,7 @@
"packageManager": "pnpm@10.16.1",
"devDependencies": {
"@types/node": "^25.0.10",
"@types/turndown": "^5.0.6",
"@types/uuid": "^11.0.0",
"tsx": "^4.21.0",
"turbo": "^2.3.4",
@ -32,6 +33,7 @@
"@mariozechner/pi-agent-core": "^0.50.3",
"@mariozechner/pi-ai": "^0.50.3",
"@mariozechner/pi-coding-agent": "^0.50.3",
"@mozilla/readability": "^0.6.0",
"@nestjs/common": "^11.1.12",
"@nestjs/core": "^11.1.12",
"@nestjs/platform-express": "^11.1.12",
@ -40,6 +42,7 @@
"@nestjs/websockets": "^11.1.12",
"@sinclair/typebox": "^0.34.41",
"fast-glob": "^3.3.3",
"linkedom": "^0.18.12",
"nestjs-pino": "^4.5.0",
"pino": "^10.3.0",
"pino-http": "^11.0.0",
@ -48,6 +51,8 @@
"rxjs": "^7.8.2",
"socket.io": "^4.8.3",
"socket.io-client": "^4.8.3",
"turndown": "^7.2.2",
"undici": "^7.19.2",
"uuid": "^13.0.0"
}
}

152
pnpm-lock.yaml generated
View file

@ -17,6 +17,9 @@ importers:
'@mariozechner/pi-coding-agent':
specifier: ^0.50.3
version: 0.50.3(@modelcontextprotocol/sdk@1.25.3(hono@4.11.7)(zod@4.3.6))(ws@8.18.3)(zod@4.3.6)
'@mozilla/readability':
specifier: ^0.6.0
version: 0.6.0
'@nestjs/common':
specifier: ^11.1.12
version: 11.1.12(reflect-metadata@0.2.2)(rxjs@7.8.2)
@ -41,6 +44,9 @@ importers:
fast-glob:
specifier: ^3.3.3
version: 3.3.3
linkedom:
specifier: ^0.18.12
version: 0.18.12
nestjs-pino:
specifier: ^4.5.0
version: 4.5.0(@nestjs/common@11.1.12(reflect-metadata@0.2.2)(rxjs@7.8.2))(pino-http@11.0.0)(pino@10.3.0)(rxjs@7.8.2)
@ -65,6 +71,12 @@ importers:
socket.io-client:
specifier: ^4.8.3
version: 4.8.3
turndown:
specifier: ^7.2.2
version: 7.2.2
undici:
specifier: ^7.19.2
version: 7.19.2
uuid:
specifier: ^13.0.0
version: 13.0.0
@ -72,6 +84,9 @@ importers:
'@types/node':
specifier: ^25.0.10
version: 25.0.10
'@types/turndown':
specifier: ^5.0.6
version: 5.0.6
'@types/uuid':
specifier: ^11.0.0
version: 11.0.0
@ -1043,6 +1058,9 @@ packages:
'@mistralai/mistralai@1.10.0':
resolution: {integrity: sha512-tdIgWs4Le8vpvPiUEWne6tK0qbVc+jMenujnvTqOjogrJUsCSQhus0tHTU1avDDh5//Rq2dFgP9mWRAdIEoBqg==}
'@mixmark-io/domino@2.2.0':
resolution: {integrity: sha512-Y28PR25bHXUg88kCV7nivXrP2Nj2RueZ3/l/jdx6J9f8J4nsEGcgX0Qe6lt7Pa+J79+kPiJU3LguR6O/6zrLOw==}
'@modelcontextprotocol/sdk@1.25.3':
resolution: {integrity: sha512-vsAMBMERybvYgKbg/l4L1rhS7VXV1c0CtyJg72vwxONVX0l4ZfKVAnZEWTQixJGTzKnELjQ59e4NbdFDALRiAQ==}
engines: {node: '>=18'}
@ -1053,6 +1071,10 @@ packages:
'@cfworker/json-schema':
optional: true
'@mozilla/readability@0.6.0':
resolution: {integrity: sha512-juG5VWh4qAivzTAeMzvY9xs9HY5rAcr2E4I7tiSSCokRFi7XIZCAu92ZkSTsIj1OPceCifL3cpfteP3pDT9/QQ==}
engines: {node: '>=14.0.0'}
'@mswjs/interceptors@0.40.0':
resolution: {integrity: sha512-EFd6cVbHsgLa6wa4RljGj6Wk75qoHxUSyc5asLyyPSyuhIcdS2Q3Phw6ImS1q+CkALthJRShiYfKANcQMuMqsQ==}
engines: {node: '>=18'}
@ -1585,6 +1607,9 @@ packages:
'@types/statuses@2.0.6':
resolution: {integrity: sha512-xMAgYwceFhRA2zY+XbEA7mxYbA093wdiW8Vu6gZPGWy9cmOyU9XesH1tNcEWsKFd5Vzrqx5T3D38PWx1FIIXkA==}
'@types/turndown@5.0.6':
resolution: {integrity: sha512-ru00MoyeeouE5BX4gRL+6m/BsDfbRayOskWqUvh7CLGW+UXxHQItqALa38kKnOiZPqJrtzJUgAC2+F0rL1S4Pg==}
'@types/uuid@11.0.0':
resolution: {integrity: sha512-HVyk8nj2m+jcFRNazzqyVKiZezyhDKrGUA3jlEcg/nZ6Ms+qHwocba1Y/AaVaznJTAM9xpdFSh+ptbNrhOGvZA==}
deprecated: This is a stub types definition. uuid provides its own type definitions, so you do not need this installed.
@ -1903,6 +1928,9 @@ packages:
resolution: {integrity: sha512-oP5VkATKlNwcgvxi0vM0p/D3n2C3EReYVX+DNYs5TjZFn/oQt2j+4sVJtSMr18pdRr8wjTcBl6LoV+FUwzPmNA==}
engines: {node: '>=18'}
boolbase@1.0.0:
resolution: {integrity: sha512-JZOSA7Mo9sNGB8+UjSgzdLtokWAky1zbztM3WRLCbZ70/3cTANmQmOdR7y2g+J0e2WXywy1yS468tY+IruqEww==}
bowser@2.13.1:
resolution: {integrity: sha512-OHawaAbjwx6rqICCKgSG0SAnT05bzd7ppyKLVUITZpANBaaMFBAsaNkto3LoQ31tyFP5kNujE8Cdx85G9VzOkw==}
@ -2076,11 +2104,21 @@ packages:
resolution: {integrity: sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==}
engines: {node: '>= 8'}
css-select@5.2.2:
resolution: {integrity: sha512-TizTzUddG/xYLA3NXodFM0fSbNizXjOKhqiQQwvhlspadZokn1KDy0NZFS0wuEubIYAV5/c1/lAr0TaaFXEXzw==}
css-what@6.2.2:
resolution: {integrity: sha512-u/O3vwbptzhMs3L1fQE82ZSLHQQfto5gyZzwteVIEyeaY5Fc7R4dapF/BvRoSYFeqfBk4m0V1Vafq5Pjv25wvA==}
engines: {node: '>= 6'}
cssesc@3.0.0:
resolution: {integrity: sha512-/Tb/JcjK111nNScGob5MNtsntNM1aCNUDipB/TkwZFhyDrrE47SOx/18wF2bbjgc3ZzCSKW1T5nt5EbFoAz/Vg==}
engines: {node: '>=4'}
hasBin: true
cssom@0.5.0:
resolution: {integrity: sha512-iKuQcq+NdHqlAcwUY0o/HL69XQrUaQdMjmStJ8JFmUaiiQErlhrmuigkg/CU4E2J0IyUKUrMAgl36TvN67MqTw==}
csstype@3.2.3:
resolution: {integrity: sha512-z1HGKcYy2xA8AGQfwrn0PAy+PB7X/GSj3UVJW9qKyn43xWa+gl5nXmU4qqLMRzWVLFC8KusUX8T/0kCiOYpAIQ==}
@ -2182,6 +2220,19 @@ packages:
resolution: {integrity: sha512-35mSku4ZXK0vfCuHEDAwt55dg2jNajHZ1odvF+8SSr82EsZY4QmXfuWso8oEd8zRhVObSN18aM0CjSdoBX7zIw==}
engines: {node: '>=0.10.0'}
dom-serializer@2.0.0:
resolution: {integrity: sha512-wIkAryiqt/nV5EQKqQpo3SToSOV9J0DnbJqwK7Wv/Trc92zIAYZ4FlMu+JPFW1DfGFt81ZTCGgDEabffXeLyJg==}
domelementtype@2.3.0:
resolution: {integrity: sha512-OLETBj6w0OsagBwdXnPdN0cnMfF9opN69co+7ZrbfPGrdpPVNBUj02spi6B1N7wChLQiPn4CSH/zJvXw56gmHw==}
domhandler@5.0.3:
resolution: {integrity: sha512-cgwlv/1iFQiFnU96XXgROh8xTeetsnJiDsTc7TYCLFd9+/WNkIqPTxiM/8pSd8VIrhXGTf1Ny1q1hquVqDJB5w==}
engines: {node: '>= 4'}
domutils@3.2.2:
resolution: {integrity: sha512-6kZKyUajlDuqlHKVX1w7gyslj9MPIXzIFiz/rGu35uC1wMi+kMhQwGhl4lt9unC9Vb9INnY9Z3/ZA3+FhASLaw==}
dotenv@17.2.3:
resolution: {integrity: sha512-JVUnt+DUIzu87TABbhPmNfVdBDt18BLOWjMUFJMSi/Qqg7NTYtabbvSNJGOJ7afbRuv9D/lngizHtP7QyLQ+9w==}
engines: {node: '>=12'}
@ -2237,6 +2288,14 @@ packages:
resolution: {integrity: sha512-LgQMM4WXU3QI+SYgEc2liRgznaD5ojbmY3sb8LxyguVkIg5FxdpTkvk72te2R38/TGKxH634oLxXRGY6d7AP+Q==}
engines: {node: '>=10.13.0'}
entities@4.5.0:
resolution: {integrity: sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==}
engines: {node: '>=0.12'}
entities@7.0.1:
resolution: {integrity: sha512-TWrgLOFUQTH994YUyl1yT4uyavY5nNB5muff+RtWaqNVCAK408b5ZnnbNAUEWLTCpum9w6arT70i1XdQ4UeOPA==}
engines: {node: '>=0.12'}
env-paths@2.2.1:
resolution: {integrity: sha512-+h1lkLKhZMTYjog1VEpJNG7NZJWcuc2DDk/qsqSTRRCOXiLjeQ1d1/udrUGhqMxUgAlwKNZ0cf2uqan5GLuS2A==}
engines: {node: '>=6'}
@ -2726,6 +2785,12 @@ packages:
resolution: {integrity: sha512-l7qMiNee7t82bH3SeyUCt9UF15EVmaBvsppY2zQtrbIhl/yzBTny+YUxsVjSjQ6gaqaeVtZmGocom8TzBlA4Yw==}
engines: {node: '>=16.9.0'}
html-escaper@3.0.3:
resolution: {integrity: sha512-RuMffC89BOWQoY0WKGpIhn5gX3iI54O6nRA0yC124NYVtzjmFWBIiFd8M0x+ZdX0P9R4lADg1mgP8C7PxGOWuQ==}
htmlparser2@10.1.0:
resolution: {integrity: sha512-VTZkM9GWRAtEpveh7MSF6SjjrpNVNNVJfFup7xTY3UpFtm67foy9HDVXneLtFVt4pMz5kZtgNcvCniNFb1hlEQ==}
http-errors@2.0.1:
resolution: {integrity: sha512-4FbRdAX+bSdmo4AUFuS0WNiPz8NgFt+r8ThgNWmlrjQjt1Q7ZR9+zTlce2859x4KSXrwIsaeTqDoKQmtP8pLmQ==}
engines: {node: '>= 0.8'}
@ -3140,6 +3205,15 @@ packages:
lines-and-columns@1.2.4:
resolution: {integrity: sha512-7ylylesZQ/PV29jhEDl3Ufjo6ZX7gCqJr5F7PKrqc93v7fzSymt1BpwEU8nAUXs8qzzvqhbjhK5QZg6Mt/HkBg==}
linkedom@0.18.12:
resolution: {integrity: sha512-jalJsOwIKuQJSeTvsgzPe9iJzyfVaEJiEXl+25EkKevsULHvMJzpNqwvj1jOESWdmgKDiXObyjOYwlUqG7wo1Q==}
engines: {node: '>=16'}
peerDependencies:
canvas: '>= 2'
peerDependenciesMeta:
canvas:
optional: true
load-esm@1.0.3:
resolution: {integrity: sha512-v5xlu8eHD1+6r8EHTg6hfmO97LN8ugKtiXcy5e6oN72iD2r6u0RPfLl6fxM+7Wnh2ZRq15o0russMst44WauPA==}
engines: {node: '>=13.2.0'}
@ -3353,6 +3427,9 @@ packages:
resolution: {integrity: sha512-9qny7Z9DsQU8Ou39ERsPU4OZQlSTP47ShQzuKZ6PRXpYLtIFgl/DEBYEXKlvcEa+9tHVcK8CF81Y2V72qaZhWA==}
engines: {node: '>=18'}
nth-check@2.1.1:
resolution: {integrity: sha512-lqjrjmaOoAnWfMmBPL+XNnynZh2+swxiX3WUE0s4yEHI6m+AwrK2UZOimIRl3X/4QctVqS8AiZjFqyOGrMXb/w==}
object-assign@4.1.1:
resolution: {integrity: sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg==}
engines: {node: '>=0.10.0'}
@ -4119,6 +4196,9 @@ packages:
resolution: {integrity: sha512-hYbxnLEdvJF+DLALS+Ia+PbfNtn0sDP0hH2u9AFoskSUDmcVHSrtwHpzdX94MrRJKo9D9tYxY3MyP20gnlrWyA==}
hasBin: true
turndown@7.2.2:
resolution: {integrity: sha512-1F7db8BiExOKxjSMU2b7if62D/XOyQyZbPKq/nUwopfgnHlqXHqQ0lvfUTeUIr1lZJzOPFn43dODyMSIfvWRKQ==}
tw-animate-css@1.4.0:
resolution: {integrity: sha512-7bziOlRqH0hJx80h/3mbicLW7o8qLsH5+RaLR2t+OHM3D0JlWGODQKQ4cxbK7WlvmUxpcj6Kgu6EKqjrGFe3QQ==}
@ -4169,6 +4249,9 @@ packages:
engines: {node: '>=14.17'}
hasBin: true
uhyphen@0.2.0:
resolution: {integrity: sha512-qz3o9CHXmJJPGBdqzab7qAYuW8kQGKNEuoHFYrBwV6hWIMcpAmxDLXojcHfFr9US1Pe6zUswEIJIbLI610fuqA==}
uid@2.0.2:
resolution: {integrity: sha512-u3xV3X7uzvi5b1MncmZo3i2Aw222Zk1keqLA1YkHldREkAhAqi65wuPfe7lHx8H/Wzy+8CE7S7uS3jekIM5s8g==}
engines: {node: '>=8'}
@ -5561,6 +5644,8 @@ snapshots:
zod: 3.25.76
zod-to-json-schema: 3.25.1(zod@3.25.76)
'@mixmark-io/domino@2.2.0': {}
'@modelcontextprotocol/sdk@1.25.3(hono@4.11.7)(zod@3.25.76)':
dependencies:
'@hono/node-server': 1.19.9(hono@4.11.7)
@ -5606,6 +5691,8 @@ snapshots:
- supports-color
optional: true
'@mozilla/readability@0.6.0': {}
'@mswjs/interceptors@0.40.0':
dependencies:
'@open-draft/deferred-promise': 2.2.0
@ -6204,6 +6291,8 @@ snapshots:
'@types/statuses@2.0.6': {}
'@types/turndown@5.0.6': {}
'@types/uuid@11.0.0':
dependencies:
uuid: 13.0.0
@ -6531,6 +6620,8 @@ snapshots:
transitivePeerDependencies:
- supports-color
boolbase@1.0.0: {}
bowser@2.13.1: {}
brace-expansion@1.1.12:
@ -6695,8 +6786,20 @@ snapshots:
shebang-command: 2.0.0
which: 2.0.2
css-select@5.2.2:
dependencies:
boolbase: 1.0.0
css-what: 6.2.2
domhandler: 5.0.3
domutils: 3.2.2
nth-check: 2.1.1
css-what@6.2.2: {}
cssesc@3.0.0: {}
cssom@0.5.0: {}
csstype@3.2.3: {}
damerau-levenshtein@1.0.8: {}
@ -6776,6 +6879,24 @@ snapshots:
dependencies:
esutils: 2.0.3
dom-serializer@2.0.0:
dependencies:
domelementtype: 2.3.0
domhandler: 5.0.3
entities: 4.5.0
domelementtype@2.3.0: {}
domhandler@5.0.3:
dependencies:
domelementtype: 2.3.0
domutils@3.2.2:
dependencies:
dom-serializer: 2.0.0
domelementtype: 2.3.0
domhandler: 5.0.3
dotenv@17.2.3: {}
dunder-proto@1.0.1:
@ -6848,6 +6969,10 @@ snapshots:
graceful-fs: 4.2.11
tapable: 2.3.0
entities@4.5.0: {}
entities@7.0.1: {}
env-paths@2.2.1: {}
error-ex@1.3.4:
@ -7577,6 +7702,15 @@ snapshots:
hono@4.11.7: {}
html-escaper@3.0.3: {}
htmlparser2@10.1.0:
dependencies:
domelementtype: 2.3.0
domhandler: 5.0.3
domutils: 3.2.2
entities: 7.0.1
http-errors@2.0.1:
dependencies:
depd: 2.0.0
@ -7942,6 +8076,14 @@ snapshots:
lines-and-columns@1.2.4: {}
linkedom@0.18.12:
dependencies:
css-select: 5.2.2
cssom: 0.5.0
html-escaper: 3.0.3
htmlparser2: 10.1.0
uhyphen: 0.2.0
load-esm@1.0.3: {}
locate-path@6.0.0:
@ -8135,6 +8277,10 @@ snapshots:
path-key: 4.0.0
unicorn-magic: 0.3.0
nth-check@2.1.1:
dependencies:
boolbase: 1.0.0
object-assign@4.1.1: {}
object-hash@3.0.0: {}
@ -9075,6 +9221,10 @@ snapshots:
turbo-windows-64: 2.8.0
turbo-windows-arm64: 2.8.0
turndown@7.2.2:
dependencies:
'@mixmark-io/domino': 2.2.0
tw-animate-css@1.4.0: {}
type-check@0.4.0:
@ -9144,6 +9294,8 @@ snapshots:
typescript@5.9.3: {}
uhyphen@0.2.0: {}
uid@2.0.2:
dependencies:
'@lukeed/csprng': 1.1.0

View file

@ -5,6 +5,7 @@ import type { AgentTool } from "@mariozechner/pi-agent-core";
import { createExecTool } from "./tools/exec.js";
import { createProcessTool } from "./tools/process.js";
import { createGlobTool } from "./tools/glob.js";
import { createWebFetchTool, createWebSearchTool } from "./tools/web/index.js";
export function resolveModel(options: AgentOptions) {
if (options.provider && options.model) {
@ -23,5 +24,14 @@ export function resolveTools(options: AgentOptions): AgentTool<any>[] {
const execTool = createExecTool(cwd);
const processTool = createProcessTool(cwd);
const globTool = createGlobTool(cwd);
return [...baseTools, execTool as AgentTool<any>, processTool as AgentTool<any>, globTool as AgentTool<any>];
const webFetchTool = createWebFetchTool();
const webSearchTool = createWebSearchTool();
return [
...baseTools,
execTool as AgentTool<any>,
processTool as AgentTool<any>,
globTool as AgentTool<any>,
webFetchTool as AgentTool<any>,
webSearchTool as AgentTool<any>,
];
}

View file

@ -0,0 +1,87 @@
export type CacheEntry<T> = {
value: T;
expiresAt: number;
insertedAt: number;
};
export const DEFAULT_TIMEOUT_SECONDS = 30;
export const DEFAULT_CACHE_TTL_MINUTES = 15;
const DEFAULT_CACHE_MAX_ENTRIES = 100;
export function resolveTimeoutSeconds(value: unknown, fallback: number): number {
const parsed = typeof value === "number" && Number.isFinite(value) ? value : fallback;
return Math.max(1, Math.floor(parsed));
}
export function resolveCacheTtlMs(value: unknown, fallbackMinutes: number): number {
const minutes =
typeof value === "number" && Number.isFinite(value) ? Math.max(0, value) : fallbackMinutes;
return Math.round(minutes * 60_000);
}
export function normalizeCacheKey(value: string): string {
return value.trim().toLowerCase();
}
export function readCache<T>(
cache: Map<string, CacheEntry<T>>,
key: string,
): { value: T; cached: boolean } | null {
const entry = cache.get(key);
if (!entry) return null;
if (Date.now() > entry.expiresAt) {
cache.delete(key);
return null;
}
return { value: entry.value, cached: true };
}
export function writeCache<T>(
cache: Map<string, CacheEntry<T>>,
key: string,
value: T,
ttlMs: number,
) {
if (ttlMs <= 0) return;
if (cache.size >= DEFAULT_CACHE_MAX_ENTRIES) {
const oldest = cache.keys().next();
if (!oldest.done) cache.delete(oldest.value);
}
cache.set(key, {
value,
expiresAt: Date.now() + ttlMs,
insertedAt: Date.now(),
});
}
export function withTimeout(signal: AbortSignal | undefined, timeoutMs: number): AbortSignal {
if (timeoutMs <= 0) return signal ?? new AbortController().signal;
const controller = new AbortController();
const timer = setTimeout(() => controller.abort(), timeoutMs);
if (signal) {
signal.addEventListener(
"abort",
() => {
clearTimeout(timer);
controller.abort();
},
{ once: true },
);
}
controller.signal.addEventListener(
"abort",
() => {
clearTimeout(timer);
},
{ once: true },
);
return controller.signal;
}
export async function readResponseText(res: Response): Promise<string> {
try {
return await res.text();
} catch {
return "";
}
}

View file

@ -0,0 +1,208 @@
import TurndownService from "turndown";
export type ExtractMode = "markdown" | "text";
export type ExtractorType = "readability" | "turndown";
export type ExtractResult = {
text: string;
title?: string;
};
export type ExtractResultWithExtractor = ExtractResult & {
extractor: ExtractorType;
};
function decodeEntities(value: string): string {
return value
.replace(/&nbsp;/gi, " ")
.replace(/&amp;/gi, "&")
.replace(/&quot;/gi, '"')
.replace(/&#39;/gi, "'")
.replace(/&lt;/gi, "<")
.replace(/&gt;/gi, ">")
.replace(/&#x([0-9a-f]+);/gi, (_, hex) => String.fromCharCode(Number.parseInt(hex, 16)))
.replace(/&#(\d+);/gi, (_, dec) => String.fromCharCode(Number.parseInt(dec, 10)));
}
function stripTags(value: string): string {
return decodeEntities(value.replace(/<[^>]+>/g, ""));
}
function normalizeWhitespace(value: string): string {
return value
.replace(/\r/g, "")
.replace(/[ \t]+\n/g, "\n")
.replace(/\n{3,}/g, "\n\n")
.replace(/[ \t]{2,}/g, " ")
.trim();
}
function extractTitle(html: string): string | undefined {
const titleMatch = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
if (!titleMatch || !titleMatch[1]) return undefined;
const title = normalizeWhitespace(stripTags(titleMatch[1]));
return title || undefined;
}
function buildResult(text: string, title: string | undefined): ExtractResult {
if (title) {
return { text, title };
}
return { text };
}
function buildResultWithExtractor(
text: string,
title: string | undefined,
extractor: ExtractorType,
): ExtractResultWithExtractor {
if (title) {
return { text, title, extractor };
}
return { text, extractor };
}
export function htmlToMarkdownSimple(html: string): ExtractResult {
const title = extractTitle(html);
let text = html
.replace(/<script[\s\S]*?<\/script>/gi, "")
.replace(/<style[\s\S]*?<\/style>/gi, "")
.replace(/<noscript[\s\S]*?<\/noscript>/gi, "");
text = text.replace(/<a\s+[^>]*href=["']([^"']+)["'][^>]*>([\s\S]*?)<\/a>/gi, (_, href, body) => {
const label = normalizeWhitespace(stripTags(body));
if (!label) return href;
return `[${label}](${href})`;
});
text = text.replace(/<h([1-6])[^>]*>([\s\S]*?)<\/h\1>/gi, (_, level, body) => {
const prefix = "#".repeat(Math.max(1, Math.min(6, Number.parseInt(level, 10))));
const label = normalizeWhitespace(stripTags(body));
return `\n${prefix} ${label}\n`;
});
text = text.replace(/<li[^>]*>([\s\S]*?)<\/li>/gi, (_, body) => {
const label = normalizeWhitespace(stripTags(body));
return label ? `\n- ${label}` : "";
});
text = text
.replace(/<(br|hr)\s*\/?>/gi, "\n")
.replace(/<\/(p|div|section|article|header|footer|table|tr|ul|ol)>/gi, "\n");
text = stripTags(text);
text = normalizeWhitespace(text);
return buildResult(text, title);
}
export function markdownToText(markdown: string): string {
let text = markdown;
text = text.replace(/!\[[^\]]*]\([^)]+\)/g, "");
text = text.replace(/\[([^\]]+)]\([^)]+\)/g, "$1");
text = text.replace(/```[\s\S]*?```/g, (block) =>
block.replace(/```[^\n]*\n?/g, "").replace(/```/g, ""),
);
text = text.replace(/`([^`]+)`/g, "$1");
text = text.replace(/^#{1,6}\s+/gm, "");
text = text.replace(/^\s*[-*+]\s+/gm, "");
text = text.replace(/^\s*\d+\.\s+/gm, "");
return normalizeWhitespace(text);
}
export function truncateText(
value: string,
maxChars: number,
): { text: string; truncated: boolean } {
if (value.length <= maxChars) return { text: value, truncated: false };
return { text: value.slice(0, maxChars), truncated: true };
}
/**
* Convert HTML to markdown using TurndownService (simpler, converts whole page)
*/
export function convertWithTurndown(html: string): ExtractResult {
const title = extractTitle(html);
const turndownService = new TurndownService({
headingStyle: "atx",
hr: "---",
bulletListMarker: "-",
codeBlockStyle: "fenced",
emDelimiter: "*",
});
turndownService.remove(["script", "style", "meta", "link", "noscript"]);
const text = normalizeWhitespace(turndownService.turndown(html));
return buildResult(text, title);
}
/**
* Extract readable content using Mozilla Readability (smarter, extracts main content)
*/
export async function extractWithReadability(params: {
html: string;
url: string;
extractMode: ExtractMode;
}): Promise<ExtractResult | null> {
const fallback = (): ExtractResult => {
const rendered = htmlToMarkdownSimple(params.html);
if (params.extractMode === "text") {
const text = markdownToText(rendered.text) || normalizeWhitespace(stripTags(params.html));
return buildResult(text, rendered.title);
}
return rendered;
};
try {
const [{ Readability }, { parseHTML }] = await Promise.all([
import("@mozilla/readability"),
import("linkedom"),
]);
const { document } = parseHTML(params.html);
try {
(document as { baseURI?: string }).baseURI = params.url;
} catch {
// Best-effort base URI for relative links.
}
const reader = new Readability(document, { charThreshold: 0 });
const parsed = reader.parse();
if (!parsed?.content) return fallback();
const title = parsed.title || undefined;
if (params.extractMode === "text") {
const text = normalizeWhitespace(parsed.textContent ?? "");
if (!text) return fallback();
return buildResult(text, title);
}
const rendered = htmlToMarkdownSimple(parsed.content);
return buildResult(rendered.text, title ?? rendered.title);
} catch {
return fallback();
}
}
/**
* Extract content from HTML using the specified extractor
*/
export async function extractContent(params: {
html: string;
url: string;
extractMode: ExtractMode;
extractor: ExtractorType;
}): Promise<ExtractResultWithExtractor> {
if (params.extractor === "turndown") {
const result = convertWithTurndown(params.html);
const text = params.extractMode === "text" ? markdownToText(result.text) : result.text;
return buildResultWithExtractor(text, result.title, "turndown");
}
// Default: readability
const result = await extractWithReadability({
html: params.html,
url: params.url,
extractMode: params.extractMode,
});
if (result) {
return buildResultWithExtractor(result.text, result.title, "readability");
}
// Fallback to turndown if readability fails
const fallback = convertWithTurndown(params.html);
const text = params.extractMode === "text" ? markdownToText(fallback.text) : fallback.text;
return buildResultWithExtractor(text, fallback.title, "turndown");
}

View file

@ -0,0 +1,2 @@
export { createWebFetchTool, type WebFetchResult } from "./web-fetch.js";
export { createWebSearchTool, type WebSearchResult } from "./web-search.js";

View file

@ -0,0 +1,73 @@
import type { AgentToolResult } from "@mariozechner/pi-agent-core";
export type StringParamOptions = {
required?: boolean;
trim?: boolean;
label?: string;
allowEmpty?: boolean;
};
export function readStringParam(
params: Record<string, unknown>,
key: string,
options: StringParamOptions & { required: true },
): string;
export function readStringParam(
params: Record<string, unknown>,
key: string,
options?: StringParamOptions,
): string | undefined;
export function readStringParam(
params: Record<string, unknown>,
key: string,
options: StringParamOptions = {},
) {
const { required = false, trim = true, label = key, allowEmpty = false } = options;
const raw = params[key];
if (typeof raw !== "string") {
if (required) throw new Error(`${label} required`);
return undefined;
}
const value = trim ? raw.trim() : raw;
if (!value && !allowEmpty) {
if (required) throw new Error(`${label} required`);
return undefined;
}
return value;
}
export function readNumberParam(
params: Record<string, unknown>,
key: string,
options: { required?: boolean; label?: string; integer?: boolean } = {},
): number | undefined {
const { required = false, label = key, integer = false } = options;
const raw = params[key];
let value: number | undefined;
if (typeof raw === "number" && Number.isFinite(raw)) {
value = raw;
} else if (typeof raw === "string") {
const trimmed = raw.trim();
if (trimmed) {
const parsed = Number.parseFloat(trimmed);
if (Number.isFinite(parsed)) value = parsed;
}
}
if (value === undefined) {
if (required) throw new Error(`${label} required`);
return undefined;
}
return integer ? Math.trunc(value) : value;
}
export function jsonResult(payload: unknown): AgentToolResult<unknown> {
return {
content: [
{
type: "text",
text: JSON.stringify(payload, null, 2),
},
],
details: payload,
};
}

244
src/agent/tools/web/ssrf.ts Normal file
View file

@ -0,0 +1,244 @@
import { lookup as dnsLookup } from "node:dns/promises";
import { lookup as dnsLookupCb, type LookupAddress } from "node:dns";
import { Agent, type Dispatcher } from "undici";
type LookupCallback = (
err: NodeJS.ErrnoException | null,
address: string | LookupAddress[],
family?: number,
) => void;
export class SsrfBlockedError extends Error {
constructor(message: string) {
super(message);
this.name = "SsrfBlockedError";
}
}
type LookupFn = typeof dnsLookup;
const PRIVATE_IPV6_PREFIXES = ["fe80:", "fec0:", "fc", "fd"];
const BLOCKED_HOSTNAMES = new Set(["localhost", "metadata.google.internal"]);
function normalizeHostname(hostname: string): string {
const normalized = hostname.trim().toLowerCase().replace(/\.$/, "");
if (normalized.startsWith("[") && normalized.endsWith("]")) {
return normalized.slice(1, -1);
}
return normalized;
}
function parseIpv4(address: string): number[] | null {
const parts = address.split(".");
if (parts.length !== 4) return null;
const numbers = parts.map((part) => Number.parseInt(part, 10));
if (numbers.some((value) => Number.isNaN(value) || value < 0 || value > 255)) return null;
return numbers;
}
function parseIpv4FromMappedIpv6(mapped: string): number[] | null {
if (mapped.includes(".")) {
return parseIpv4(mapped);
}
const parts = mapped.split(":").filter(Boolean);
if (parts.length === 1) {
const part0 = parts[0];
if (!part0) return null;
const value = Number.parseInt(part0, 16);
if (Number.isNaN(value) || value < 0 || value > 0xffff_ffff) return null;
return [(value >>> 24) & 0xff, (value >>> 16) & 0xff, (value >>> 8) & 0xff, value & 0xff];
}
if (parts.length !== 2) return null;
const part0 = parts[0];
const part1 = parts[1];
if (!part0 || !part1) return null;
const high = Number.parseInt(part0, 16);
const low = Number.parseInt(part1, 16);
if (
Number.isNaN(high) ||
Number.isNaN(low) ||
high < 0 ||
low < 0 ||
high > 0xffff ||
low > 0xffff
) {
return null;
}
const value = (high << 16) + low;
return [(value >>> 24) & 0xff, (value >>> 16) & 0xff, (value >>> 8) & 0xff, value & 0xff];
}
function isPrivateIpv4(parts: number[]): boolean {
const octet1 = parts[0];
const octet2 = parts[1];
if (octet1 === undefined || octet2 === undefined) return false;
if (octet1 === 0) return true;
if (octet1 === 10) return true;
if (octet1 === 127) return true;
if (octet1 === 169 && octet2 === 254) return true;
if (octet1 === 172 && octet2 >= 16 && octet2 <= 31) return true;
if (octet1 === 192 && octet2 === 168) return true;
if (octet1 === 100 && octet2 >= 64 && octet2 <= 127) return true;
return false;
}
export function isPrivateIpAddress(address: string): boolean {
let normalized = address.trim().toLowerCase();
if (normalized.startsWith("[") && normalized.endsWith("]")) {
normalized = normalized.slice(1, -1);
}
if (!normalized) return false;
if (normalized.startsWith("::ffff:")) {
const mapped = normalized.slice("::ffff:".length);
const ipv4 = parseIpv4FromMappedIpv6(mapped);
if (ipv4) return isPrivateIpv4(ipv4);
}
if (normalized.includes(":")) {
if (normalized === "::" || normalized === "::1") return true;
return PRIVATE_IPV6_PREFIXES.some((prefix) => normalized.startsWith(prefix));
}
const ipv4 = parseIpv4(normalized);
if (!ipv4) return false;
return isPrivateIpv4(ipv4);
}
export function isBlockedHostname(hostname: string): boolean {
const normalized = normalizeHostname(hostname);
if (!normalized) return false;
if (BLOCKED_HOSTNAMES.has(normalized)) return true;
return (
normalized.endsWith(".localhost") ||
normalized.endsWith(".local") ||
normalized.endsWith(".internal")
);
}
export function createPinnedLookup(params: {
hostname: string;
addresses: string[];
fallback?: typeof dnsLookupCb;
}): typeof dnsLookupCb {
const normalizedHost = normalizeHostname(params.hostname);
const fallback = params.fallback ?? dnsLookupCb;
const fallbackLookup = fallback as unknown as (
hostname: string,
callback: LookupCallback,
) => void;
const fallbackWithOptions = fallback as unknown as (
hostname: string,
options: unknown,
callback: LookupCallback,
) => void;
const records = params.addresses.map((address) => ({
address,
family: address.includes(":") ? 6 : 4,
}));
let index = 0;
return ((host: string, options?: unknown, callback?: unknown) => {
const cb: LookupCallback =
typeof options === "function" ? (options as LookupCallback) : (callback as LookupCallback);
if (!cb) return;
const normalized = normalizeHostname(host);
if (!normalized || normalized !== normalizedHost) {
if (typeof options === "function" || options === undefined) {
return fallbackLookup(host, cb);
}
return fallbackWithOptions(host, options, cb);
}
const opts =
typeof options === "object" && options !== null
? (options as { all?: boolean; family?: number })
: {};
const requestedFamily =
typeof options === "number" ? options : typeof opts.family === "number" ? opts.family : 0;
const candidates =
requestedFamily === 4 || requestedFamily === 6
? records.filter((entry) => entry.family === requestedFamily)
: records;
const usable = candidates.length > 0 ? candidates : records;
if (opts.all) {
cb(null, usable as LookupAddress[]);
return;
}
const chosen = usable[index % usable.length];
index += 1;
if (chosen) {
cb(null, chosen.address, chosen.family);
}
}) as typeof dnsLookupCb;
}
export type PinnedHostname = {
hostname: string;
addresses: string[];
lookup: typeof dnsLookupCb;
};
export async function resolvePinnedHostname(
hostname: string,
lookupFn: LookupFn = dnsLookup,
): Promise<PinnedHostname> {
const normalized = normalizeHostname(hostname);
if (!normalized) {
throw new Error("Invalid hostname");
}
if (isBlockedHostname(normalized)) {
throw new SsrfBlockedError(`Blocked hostname: ${hostname}`);
}
if (isPrivateIpAddress(normalized)) {
throw new SsrfBlockedError("Blocked: private/internal IP address");
}
const results = await lookupFn(normalized, { all: true });
if (results.length === 0) {
throw new Error(`Unable to resolve hostname: ${hostname}`);
}
for (const entry of results) {
if (isPrivateIpAddress(entry.address)) {
throw new SsrfBlockedError("Blocked: resolves to private/internal IP address");
}
}
const addresses = Array.from(new Set(results.map((entry) => entry.address)));
if (addresses.length === 0) {
throw new Error(`Unable to resolve hostname: ${hostname}`);
}
return {
hostname: normalized,
addresses,
lookup: createPinnedLookup({ hostname: normalized, addresses }),
};
}
export function createPinnedDispatcher(pinned: PinnedHostname): Dispatcher {
return new Agent({
connect: {
lookup: pinned.lookup,
},
});
}
export async function closeDispatcher(dispatcher?: Dispatcher | null): Promise<void> {
if (!dispatcher) return;
const candidate = dispatcher as { close?: () => Promise<void> | void; destroy?: () => void };
try {
if (typeof candidate.close === "function") {
await candidate.close();
return;
}
if (typeof candidate.destroy === "function") {
candidate.destroy();
}
} catch {
// ignore dispatcher cleanup errors
}
}

View file

@ -0,0 +1,335 @@
import { Type } from "@sinclair/typebox";
import type { AgentTool } from "@mariozechner/pi-agent-core";
import type { Dispatcher } from "undici";
import {
closeDispatcher,
createPinnedDispatcher,
resolvePinnedHostname,
SsrfBlockedError,
} from "./ssrf.js";
import {
DEFAULT_CACHE_TTL_MINUTES,
DEFAULT_TIMEOUT_SECONDS,
normalizeCacheKey,
readCache,
readResponseText,
resolveCacheTtlMs,
resolveTimeoutSeconds,
withTimeout,
writeCache,
} from "./cache.js";
import type { CacheEntry } from "./cache.js";
import { extractContent, markdownToText, truncateText, type ExtractMode, type ExtractorType } from "./html-utils.js";
import { jsonResult, readNumberParam, readStringParam } from "./param-helpers.js";
const EXTRACT_MODES = ["markdown", "text"] as const;
const EXTRACTOR_TYPES = ["readability", "turndown"] as const;
const DEFAULT_FETCH_MAX_CHARS = 50_000;
const DEFAULT_FETCH_MAX_REDIRECTS = 3;
const DEFAULT_ERROR_MAX_CHARS = 4_000;
const DEFAULT_FETCH_USER_AGENT =
"Mozilla/5.0 (Macintosh; Intel Mac OS X 14_7_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36";
const FETCH_CACHE = new Map<string, CacheEntry<Record<string, unknown>>>();
const WebFetchSchema = Type.Object({
url: Type.String({ description: "HTTP or HTTPS URL to fetch." }),
extractMode: Type.Optional(
Type.String({
description: 'Output format: "markdown" (default) or "text" (plain text).',
}),
),
extractor: Type.Optional(
Type.String({
description:
'Extraction method: "readability" (default, smart extraction of main content) or "turndown" (convert entire page).',
}),
),
maxChars: Type.Optional(
Type.Number({
description: "Maximum characters to return (truncates when exceeded). Default: 50000.",
minimum: 100,
}),
),
});
type WebFetchArgs = {
url: string;
extractMode?: string;
extractor?: string;
maxChars?: number;
};
export type WebFetchResult = {
url: string;
finalUrl: string;
status: number;
contentType: string;
title?: string;
extractMode: ExtractMode;
extractor: ExtractorType | "raw" | "json";
truncated: boolean;
length: number;
fetchedAt: string;
tookMs: number;
text: string;
cached?: boolean;
};
function resolveMaxChars(value: unknown, fallback: number): number {
const parsed = typeof value === "number" && Number.isFinite(value) ? value : fallback;
return Math.max(100, Math.floor(parsed));
}
function resolveMaxRedirects(value: unknown, fallback: number): number {
const parsed = typeof value === "number" && Number.isFinite(value) ? value : fallback;
return Math.max(0, Math.floor(parsed));
}
function looksLikeHtml(value: string): boolean {
const trimmed = value.trimStart();
if (!trimmed) return false;
const head = trimmed.slice(0, 256).toLowerCase();
return head.startsWith("<!doctype html") || head.startsWith("<html");
}
function isRedirectStatus(status: number): boolean {
return status === 301 || status === 302 || status === 303 || status === 307 || status === 308;
}
async function fetchWithRedirects(params: {
url: string;
maxRedirects: number;
timeoutSeconds: number;
userAgent: string;
}): Promise<{ response: Response; finalUrl: string; dispatcher: Dispatcher }> {
const signal = withTimeout(undefined, params.timeoutSeconds * 1000);
const visited = new Set<string>();
let currentUrl = params.url;
let redirectCount = 0;
while (true) {
let parsedUrl: URL;
try {
parsedUrl = new URL(currentUrl);
} catch {
throw new Error("Invalid URL: must be http or https");
}
if (!["http:", "https:"].includes(parsedUrl.protocol)) {
throw new Error("Invalid URL: must be http or https");
}
const pinned = await resolvePinnedHostname(parsedUrl.hostname);
const dispatcher = createPinnedDispatcher(pinned);
let res: Response;
try {
// Use undici's dispatcher for SSRF protection
res = await fetch(parsedUrl.toString(), {
method: "GET",
headers: {
Accept: "*/*",
"User-Agent": params.userAgent,
"Accept-Language": "en-US,en;q=0.9",
},
signal,
redirect: "manual",
dispatcher,
} as unknown as RequestInit);
} catch (err) {
await closeDispatcher(dispatcher);
throw err;
}
if (isRedirectStatus(res.status)) {
const location = res.headers.get("location");
if (!location) {
await closeDispatcher(dispatcher);
throw new Error(`Redirect missing location header (${res.status})`);
}
redirectCount += 1;
if (redirectCount > params.maxRedirects) {
await closeDispatcher(dispatcher);
throw new Error(`Too many redirects (limit: ${params.maxRedirects})`);
}
const nextUrl = new URL(location, parsedUrl).toString();
if (visited.has(nextUrl)) {
await closeDispatcher(dispatcher);
throw new Error("Redirect loop detected");
}
visited.add(nextUrl);
void res.body?.cancel();
await closeDispatcher(dispatcher);
currentUrl = nextUrl;
continue;
}
return { response: res, finalUrl: currentUrl, dispatcher };
}
}
function formatWebFetchErrorDetail(params: {
detail: string;
contentType?: string | null;
maxChars: number;
}): string {
const { detail, contentType, maxChars } = params;
if (!detail) return "";
let text = detail;
const contentTypeLower = contentType?.toLowerCase();
if (contentTypeLower?.includes("text/html") || looksLikeHtml(detail)) {
text = markdownToText(detail);
}
const truncated = truncateText(text.trim(), maxChars);
return truncated.text;
}
async function runWebFetch(params: {
url: string;
extractMode: ExtractMode;
extractor: ExtractorType;
maxChars: number;
maxRedirects: number;
timeoutSeconds: number;
cacheTtlMs: number;
userAgent: string;
}): Promise<WebFetchResult> {
const cacheKey = normalizeCacheKey(
`fetch:${params.url}:${params.extractMode}:${params.extractor}:${params.maxChars}`,
);
const cached = readCache(FETCH_CACHE, cacheKey);
if (cached) return { ...cached.value, cached: true } as WebFetchResult;
let parsedUrl: URL;
try {
parsedUrl = new URL(params.url);
} catch {
throw new Error("Invalid URL: must be http or https");
}
if (!["http:", "https:"].includes(parsedUrl.protocol)) {
throw new Error("Invalid URL: must be http or https");
}
const start = Date.now();
let res: Response;
let dispatcher: Dispatcher | null = null;
let finalUrl = params.url;
const result = await fetchWithRedirects({
url: params.url,
maxRedirects: params.maxRedirects,
timeoutSeconds: params.timeoutSeconds,
userAgent: params.userAgent,
});
res = result.response;
finalUrl = result.finalUrl;
dispatcher = result.dispatcher;
try {
if (!res.ok) {
const rawDetail = await readResponseText(res);
const detail = formatWebFetchErrorDetail({
detail: rawDetail,
contentType: res.headers.get("content-type"),
maxChars: DEFAULT_ERROR_MAX_CHARS,
});
throw new Error(`Web fetch failed (${res.status}): ${detail || res.statusText}`);
}
const contentType = res.headers.get("content-type") ?? "application/octet-stream";
const body = await readResponseText(res);
let title: string | undefined;
let extractor: ExtractorType | "raw" | "json" = "raw";
let text = body;
if (contentType.includes("text/html")) {
const extracted = await extractContent({
html: body,
url: finalUrl,
extractMode: params.extractMode,
extractor: params.extractor,
});
text = extracted.text;
title = extracted.title;
extractor = extracted.extractor;
} else if (contentType.includes("application/json")) {
try {
text = JSON.stringify(JSON.parse(body), null, 2);
extractor = "json";
} catch {
text = body;
extractor = "raw";
}
}
const truncated = truncateText(text, params.maxChars);
const payload: WebFetchResult = {
url: params.url,
finalUrl,
status: res.status,
contentType,
extractMode: params.extractMode,
extractor,
truncated: truncated.truncated,
length: truncated.text.length,
fetchedAt: new Date().toISOString(),
tookMs: Date.now() - start,
text: truncated.text,
};
if (title) {
payload.title = title;
}
writeCache(FETCH_CACHE, cacheKey, payload, params.cacheTtlMs);
return payload;
} finally {
await closeDispatcher(dispatcher);
}
}
export function createWebFetchTool(): AgentTool<typeof WebFetchSchema, unknown> {
return {
name: "web_fetch",
label: "Web Fetch",
description:
'Fetch and extract readable content from a URL. Converts HTML to markdown or plain text. Use extractor="readability" for smart article extraction, or "turndown" for full page conversion.',
parameters: WebFetchSchema,
execute: async (_toolCallId, args) => {
const params = args as WebFetchArgs;
const url = readStringParam(params as Record<string, unknown>, "url", { required: true });
const extractModeRaw = readStringParam(params as Record<string, unknown>, "extractMode");
const extractMode: ExtractMode =
extractModeRaw === "text" ? "text" : "markdown";
const extractorRaw = readStringParam(params as Record<string, unknown>, "extractor");
const extractor: ExtractorType =
extractorRaw === "turndown" ? "turndown" : "readability";
const maxChars = readNumberParam(params as Record<string, unknown>, "maxChars", { integer: true });
try {
const result = await runWebFetch({
url,
extractMode,
extractor,
maxChars: resolveMaxChars(maxChars, DEFAULT_FETCH_MAX_CHARS),
maxRedirects: DEFAULT_FETCH_MAX_REDIRECTS,
timeoutSeconds: DEFAULT_TIMEOUT_SECONDS,
cacheTtlMs: resolveCacheTtlMs(DEFAULT_CACHE_TTL_MINUTES, DEFAULT_CACHE_TTL_MINUTES),
userAgent: DEFAULT_FETCH_USER_AGENT,
});
return jsonResult(result);
} catch (error) {
if (error instanceof SsrfBlockedError) {
return jsonResult({
error: "ssrf_blocked",
message: error.message,
});
}
return jsonResult({
error: "fetch_failed",
message: error instanceof Error ? error.message : String(error),
});
}
},
};
}

View file

@ -0,0 +1,451 @@
import { Type } from "@sinclair/typebox";
import type { AgentTool } from "@mariozechner/pi-agent-core";
import {
DEFAULT_CACHE_TTL_MINUTES,
DEFAULT_TIMEOUT_SECONDS,
normalizeCacheKey,
readCache,
readResponseText,
resolveCacheTtlMs,
resolveTimeoutSeconds,
withTimeout,
writeCache,
} from "./cache.js";
import type { CacheEntry } from "./cache.js";
import { jsonResult, readNumberParam, readStringParam } from "./param-helpers.js";
const SEARCH_PROVIDERS = ["brave", "perplexity"] as const;
type SearchProvider = (typeof SEARCH_PROVIDERS)[number];
const DEFAULT_SEARCH_COUNT = 5;
const MAX_SEARCH_COUNT = 10;
const BRAVE_SEARCH_ENDPOINT = "https://api.search.brave.com/res/v1/web/search";
const DEFAULT_PERPLEXITY_BASE_URL = "https://openrouter.ai/api/v1";
const PERPLEXITY_DIRECT_BASE_URL = "https://api.perplexity.ai";
const DEFAULT_PERPLEXITY_MODEL = "perplexity/sonar-pro";
const PERPLEXITY_KEY_PREFIXES = ["pplx-"];
const OPENROUTER_KEY_PREFIXES = ["sk-or-"];
const SEARCH_CACHE = new Map<string, CacheEntry<Record<string, unknown>>>();
const BRAVE_FRESHNESS_SHORTCUTS = new Set(["pd", "pw", "pm", "py"]);
const BRAVE_FRESHNESS_RANGE = /^(\d{4}-\d{2}-\d{2})to(\d{4}-\d{2}-\d{2})$/;
const WebSearchSchema = Type.Object({
query: Type.String({ description: "Search query string." }),
provider: Type.Optional(
Type.String({
description:
'Search provider: "brave" (default, traditional search results) or "perplexity" (AI-synthesized answers).',
}),
),
count: Type.Optional(
Type.Number({
description: "Number of results to return (1-10). Default: 5. Brave only.",
minimum: 1,
maximum: MAX_SEARCH_COUNT,
}),
),
country: Type.Optional(
Type.String({
description:
"2-letter country code for region-specific results (e.g., 'DE', 'US'). Default: 'US'.",
}),
),
freshness: Type.Optional(
Type.String({
description:
"Filter results by time (Brave only): 'pd' (past day), 'pw' (past week), 'pm' (past month), 'py' (past year), or 'YYYY-MM-DDtoYYYY-MM-DD'.",
}),
),
});
type WebSearchArgs = {
query: string;
provider?: string;
count?: number;
country?: string;
freshness?: string;
};
type BraveSearchResult = {
title?: string;
url?: string;
description?: string;
age?: string;
};
type BraveSearchResponse = {
web?: {
results?: BraveSearchResult[];
};
};
type PerplexitySearchResponse = {
choices?: Array<{
message?: {
content?: string;
};
}>;
citations?: string[];
};
export type WebSearchResult = {
query: string;
provider: SearchProvider;
tookMs: number;
cached?: boolean;
} & (
| {
// Brave result
count: number;
results: Array<{
title: string;
url: string;
description: string;
published?: string;
siteName?: string;
}>;
}
| {
// Perplexity result
model: string;
content: string;
citations: string[];
}
);
function resolveSearchCount(value: unknown, fallback: number): number {
const parsed = typeof value === "number" && Number.isFinite(value) ? value : fallback;
const clamped = Math.max(1, Math.min(MAX_SEARCH_COUNT, Math.floor(parsed)));
return clamped;
}
function normalizeFreshness(value: string | undefined): string | undefined {
if (!value) return undefined;
const trimmed = value.trim();
if (!trimmed) return undefined;
const lower = trimmed.toLowerCase();
if (BRAVE_FRESHNESS_SHORTCUTS.has(lower)) return lower;
const match = trimmed.match(BRAVE_FRESHNESS_RANGE);
if (!match) return undefined;
const start = match[1];
const end = match[2];
if (!start || !end) return undefined;
if (!isValidIsoDate(start) || !isValidIsoDate(end)) return undefined;
if (start > end) return undefined;
return `${start}to${end}`;
}
function isValidIsoDate(value: string): boolean {
if (!/^\d{4}-\d{2}-\d{2}$/.test(value)) return false;
const parts = value.split("-").map((part) => Number.parseInt(part, 10));
const year = parts[0];
const month = parts[1];
const day = parts[2];
if (year === undefined || month === undefined || day === undefined) return false;
if (!Number.isFinite(year) || !Number.isFinite(month) || !Number.isFinite(day)) return false;
const date = new Date(Date.UTC(year, month - 1, day));
return (
date.getUTCFullYear() === year && date.getUTCMonth() === month - 1 && date.getUTCDate() === day
);
}
function resolveSiteName(url: string | undefined): string | undefined {
if (!url) return undefined;
try {
return new URL(url).hostname;
} catch {
return undefined;
}
}
function inferPerplexityBaseUrl(apiKey: string): string {
const normalized = apiKey.toLowerCase();
if (PERPLEXITY_KEY_PREFIXES.some((prefix) => normalized.startsWith(prefix))) {
return PERPLEXITY_DIRECT_BASE_URL;
}
if (OPENROUTER_KEY_PREFIXES.some((prefix) => normalized.startsWith(prefix))) {
return DEFAULT_PERPLEXITY_BASE_URL;
}
return DEFAULT_PERPLEXITY_BASE_URL;
}
function resolvePerplexityApiKey(): { apiKey: string; source: string } | { apiKey: null; source: "none" } {
const perplexityKey = (process.env.PERPLEXITY_API_KEY ?? "").trim();
if (perplexityKey) {
return { apiKey: perplexityKey, source: "PERPLEXITY_API_KEY" };
}
const openrouterKey = (process.env.OPENROUTER_API_KEY ?? "").trim();
if (openrouterKey) {
return { apiKey: openrouterKey, source: "OPENROUTER_API_KEY" };
}
return { apiKey: null, source: "none" };
}
function resolveBraveApiKey(): string | undefined {
return (process.env.BRAVE_API_KEY ?? "").trim() || undefined;
}
function resolveProvider(requested?: string): SearchProvider {
if (requested === "perplexity") return "perplexity";
if (requested === "brave") return "brave";
// Auto-detect based on available API keys
const braveKey = resolveBraveApiKey();
if (braveKey) return "brave";
const perplexityResult = resolvePerplexityApiKey();
if (perplexityResult.apiKey) return "perplexity";
// Default to brave
return "brave";
}
async function runPerplexitySearch(params: {
query: string;
apiKey: string;
baseUrl: string;
model: string;
timeoutSeconds: number;
}): Promise<{ content: string; citations: string[] }> {
const endpoint = `${params.baseUrl.replace(/\/$/, "")}/chat/completions`;
const res = await fetch(endpoint, {
method: "POST",
headers: {
"Content-Type": "application/json",
Authorization: `Bearer ${params.apiKey}`,
"HTTP-Referer": "https://multica.ai",
"X-Title": "Multica Web Search",
},
body: JSON.stringify({
model: params.model,
messages: [
{
role: "user",
content: params.query,
},
],
}),
signal: withTimeout(undefined, params.timeoutSeconds * 1000),
});
if (!res.ok) {
const detail = await readResponseText(res);
throw new Error(`Perplexity API error (${res.status}): ${detail || res.statusText}`);
}
const data = (await res.json()) as PerplexitySearchResponse;
const content = data.choices?.[0]?.message?.content ?? "No response";
const citations = data.citations ?? [];
return { content, citations };
}
async function runBraveSearch(params: {
query: string;
count: number;
apiKey: string;
timeoutSeconds: number;
country: string | undefined;
freshness: string | undefined;
}): Promise<{
results: Array<{
title: string;
url: string;
description: string;
published?: string;
siteName?: string;
}>;
}> {
const url = new URL(BRAVE_SEARCH_ENDPOINT);
url.searchParams.set("q", params.query);
url.searchParams.set("count", String(params.count));
if (params.country) {
url.searchParams.set("country", params.country);
}
if (params.freshness) {
url.searchParams.set("freshness", params.freshness);
}
const res = await fetch(url.toString(), {
method: "GET",
headers: {
Accept: "application/json",
"X-Subscription-Token": params.apiKey,
},
signal: withTimeout(undefined, params.timeoutSeconds * 1000),
});
if (!res.ok) {
const detail = await readResponseText(res);
throw new Error(`Brave Search API error (${res.status}): ${detail || res.statusText}`);
}
const data = (await res.json()) as BraveSearchResponse;
const rawResults = Array.isArray(data.web?.results) ? (data.web?.results ?? []) : [];
const results = rawResults.map((entry) => {
const result: {
title: string;
url: string;
description: string;
published?: string;
siteName?: string;
} = {
title: entry.title ?? "",
url: entry.url ?? "",
description: entry.description ?? "",
};
if (entry.age) {
result.published = entry.age;
}
const siteName = resolveSiteName(entry.url);
if (siteName) {
result.siteName = siteName;
}
return result;
});
return { results };
}
async function runWebSearch(params: {
query: string;
provider: SearchProvider;
count: number;
timeoutSeconds: number;
cacheTtlMs: number;
country: string | undefined;
freshness: string | undefined;
}): Promise<Record<string, unknown>> {
const cacheKey = normalizeCacheKey(
`${params.provider}:${params.query}:${params.count}:${params.country || "default"}:${params.freshness || "default"}`,
);
const cached = readCache(SEARCH_CACHE, cacheKey);
if (cached) return { ...cached.value, cached: true };
const start = Date.now();
if (params.provider === "perplexity") {
const perplexityResult = resolvePerplexityApiKey();
if (!perplexityResult.apiKey) {
return {
error: "missing_api_key",
message:
"Perplexity search requires PERPLEXITY_API_KEY or OPENROUTER_API_KEY environment variable.",
};
}
const apiKey = perplexityResult.apiKey;
const baseUrl = inferPerplexityBaseUrl(apiKey);
const { content, citations } = await runPerplexitySearch({
query: params.query,
apiKey,
baseUrl,
model: DEFAULT_PERPLEXITY_MODEL,
timeoutSeconds: params.timeoutSeconds,
});
const payload = {
query: params.query,
provider: params.provider,
model: DEFAULT_PERPLEXITY_MODEL,
tookMs: Date.now() - start,
content,
citations,
};
writeCache(SEARCH_CACHE, cacheKey, payload, params.cacheTtlMs);
return payload;
}
// Brave search
const apiKey = resolveBraveApiKey();
if (!apiKey) {
return {
error: "missing_api_key",
message: "Brave search requires BRAVE_API_KEY environment variable.",
};
}
const { results } = await runBraveSearch({
query: params.query,
count: params.count,
apiKey,
timeoutSeconds: params.timeoutSeconds,
country: params.country,
freshness: params.freshness,
});
const payload = {
query: params.query,
provider: params.provider,
count: results.length,
tookMs: Date.now() - start,
results,
};
writeCache(SEARCH_CACHE, cacheKey, payload, params.cacheTtlMs);
return payload;
}
export function createWebSearchTool(): AgentTool<typeof WebSearchSchema, unknown> {
return {
name: "web_search",
label: "Web Search",
description:
'Search the web. Supports "brave" (traditional results with titles/URLs/snippets) and "perplexity" (AI-synthesized answers with citations). Provider auto-detected from available API keys if not specified.',
parameters: WebSearchSchema,
execute: async (_toolCallId, args) => {
const params = args as WebSearchArgs;
const query = readStringParam(params as Record<string, unknown>, "query", { required: true });
const providerRaw = readStringParam(params as Record<string, unknown>, "provider");
const provider = resolveProvider(providerRaw);
const count =
readNumberParam(params as Record<string, unknown>, "count", { integer: true }) ??
DEFAULT_SEARCH_COUNT;
const country = readStringParam(params as Record<string, unknown>, "country");
const rawFreshness = readStringParam(params as Record<string, unknown>, "freshness");
if (rawFreshness && provider !== "brave") {
return jsonResult({
error: "unsupported_parameter",
message: "freshness parameter is only supported by the Brave search provider.",
});
}
const freshness = rawFreshness ? normalizeFreshness(rawFreshness) : undefined;
if (rawFreshness && !freshness) {
return jsonResult({
error: "invalid_freshness",
message:
"freshness must be one of: pd (past day), pw (past week), pm (past month), py (past year), or YYYY-MM-DDtoYYYY-MM-DD.",
});
}
try {
const result = await runWebSearch({
query,
provider,
count: resolveSearchCount(count, DEFAULT_SEARCH_COUNT),
timeoutSeconds: resolveTimeoutSeconds(DEFAULT_TIMEOUT_SECONDS, DEFAULT_TIMEOUT_SECONDS),
cacheTtlMs: resolveCacheTtlMs(DEFAULT_CACHE_TTL_MINUTES, DEFAULT_CACHE_TTL_MINUTES),
country,
freshness,
});
return jsonResult(result);
} catch (error) {
return jsonResult({
error: "search_failed",
message: error instanceof Error ? error.message : String(error),
});
}
},
};
}