feat(agent): add web_fetch and web_search tools (#20)
* chore(deps): add web tools dependencies Add undici, @mozilla/readability, linkedom, and turndown for web fetching and HTML content extraction capabilities. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * feat(agent): add web_fetch and web_search tools Port network tools from moltbot with the following features: web_fetch: - Fetch and extract readable content from URLs - Dual extraction mode: readability (smart) or turndown (full page) - SSRF protection (blocks private IPs, localhost) - Response caching with TTL - Redirect handling web_search: - Brave Search API for traditional results - Perplexity API for AI-synthesized answers - Region/language settings - Freshness filtering (Brave only) - Auto-detects provider from available API keys Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> * feat(agent): register web_fetch and web_search tools Add web tools to the agent's tool registry. Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
parent
028c3a2911
commit
9b3ffd1e90
10 changed files with 1568 additions and 1 deletions
|
|
@ -23,6 +23,7 @@
|
|||
"packageManager": "pnpm@10.16.1",
|
||||
"devDependencies": {
|
||||
"@types/node": "^25.0.10",
|
||||
"@types/turndown": "^5.0.6",
|
||||
"@types/uuid": "^11.0.0",
|
||||
"tsx": "^4.21.0",
|
||||
"turbo": "^2.3.4",
|
||||
|
|
@ -32,6 +33,7 @@
|
|||
"@mariozechner/pi-agent-core": "^0.50.3",
|
||||
"@mariozechner/pi-ai": "^0.50.3",
|
||||
"@mariozechner/pi-coding-agent": "^0.50.3",
|
||||
"@mozilla/readability": "^0.6.0",
|
||||
"@nestjs/common": "^11.1.12",
|
||||
"@nestjs/core": "^11.1.12",
|
||||
"@nestjs/platform-express": "^11.1.12",
|
||||
|
|
@ -40,6 +42,7 @@
|
|||
"@nestjs/websockets": "^11.1.12",
|
||||
"@sinclair/typebox": "^0.34.41",
|
||||
"fast-glob": "^3.3.3",
|
||||
"linkedom": "^0.18.12",
|
||||
"nestjs-pino": "^4.5.0",
|
||||
"pino": "^10.3.0",
|
||||
"pino-http": "^11.0.0",
|
||||
|
|
@ -48,6 +51,8 @@
|
|||
"rxjs": "^7.8.2",
|
||||
"socket.io": "^4.8.3",
|
||||
"socket.io-client": "^4.8.3",
|
||||
"turndown": "^7.2.2",
|
||||
"undici": "^7.19.2",
|
||||
"uuid": "^13.0.0"
|
||||
}
|
||||
}
|
||||
|
|
|
|||
152
pnpm-lock.yaml
generated
152
pnpm-lock.yaml
generated
|
|
@ -17,6 +17,9 @@ importers:
|
|||
'@mariozechner/pi-coding-agent':
|
||||
specifier: ^0.50.3
|
||||
version: 0.50.3(@modelcontextprotocol/sdk@1.25.3(hono@4.11.7)(zod@4.3.6))(ws@8.18.3)(zod@4.3.6)
|
||||
'@mozilla/readability':
|
||||
specifier: ^0.6.0
|
||||
version: 0.6.0
|
||||
'@nestjs/common':
|
||||
specifier: ^11.1.12
|
||||
version: 11.1.12(reflect-metadata@0.2.2)(rxjs@7.8.2)
|
||||
|
|
@ -41,6 +44,9 @@ importers:
|
|||
fast-glob:
|
||||
specifier: ^3.3.3
|
||||
version: 3.3.3
|
||||
linkedom:
|
||||
specifier: ^0.18.12
|
||||
version: 0.18.12
|
||||
nestjs-pino:
|
||||
specifier: ^4.5.0
|
||||
version: 4.5.0(@nestjs/common@11.1.12(reflect-metadata@0.2.2)(rxjs@7.8.2))(pino-http@11.0.0)(pino@10.3.0)(rxjs@7.8.2)
|
||||
|
|
@ -65,6 +71,12 @@ importers:
|
|||
socket.io-client:
|
||||
specifier: ^4.8.3
|
||||
version: 4.8.3
|
||||
turndown:
|
||||
specifier: ^7.2.2
|
||||
version: 7.2.2
|
||||
undici:
|
||||
specifier: ^7.19.2
|
||||
version: 7.19.2
|
||||
uuid:
|
||||
specifier: ^13.0.0
|
||||
version: 13.0.0
|
||||
|
|
@ -72,6 +84,9 @@ importers:
|
|||
'@types/node':
|
||||
specifier: ^25.0.10
|
||||
version: 25.0.10
|
||||
'@types/turndown':
|
||||
specifier: ^5.0.6
|
||||
version: 5.0.6
|
||||
'@types/uuid':
|
||||
specifier: ^11.0.0
|
||||
version: 11.0.0
|
||||
|
|
@ -1043,6 +1058,9 @@ packages:
|
|||
'@mistralai/mistralai@1.10.0':
|
||||
resolution: {integrity: sha512-tdIgWs4Le8vpvPiUEWne6tK0qbVc+jMenujnvTqOjogrJUsCSQhus0tHTU1avDDh5//Rq2dFgP9mWRAdIEoBqg==}
|
||||
|
||||
'@mixmark-io/domino@2.2.0':
|
||||
resolution: {integrity: sha512-Y28PR25bHXUg88kCV7nivXrP2Nj2RueZ3/l/jdx6J9f8J4nsEGcgX0Qe6lt7Pa+J79+kPiJU3LguR6O/6zrLOw==}
|
||||
|
||||
'@modelcontextprotocol/sdk@1.25.3':
|
||||
resolution: {integrity: sha512-vsAMBMERybvYgKbg/l4L1rhS7VXV1c0CtyJg72vwxONVX0l4ZfKVAnZEWTQixJGTzKnELjQ59e4NbdFDALRiAQ==}
|
||||
engines: {node: '>=18'}
|
||||
|
|
@ -1053,6 +1071,10 @@ packages:
|
|||
'@cfworker/json-schema':
|
||||
optional: true
|
||||
|
||||
'@mozilla/readability@0.6.0':
|
||||
resolution: {integrity: sha512-juG5VWh4qAivzTAeMzvY9xs9HY5rAcr2E4I7tiSSCokRFi7XIZCAu92ZkSTsIj1OPceCifL3cpfteP3pDT9/QQ==}
|
||||
engines: {node: '>=14.0.0'}
|
||||
|
||||
'@mswjs/interceptors@0.40.0':
|
||||
resolution: {integrity: sha512-EFd6cVbHsgLa6wa4RljGj6Wk75qoHxUSyc5asLyyPSyuhIcdS2Q3Phw6ImS1q+CkALthJRShiYfKANcQMuMqsQ==}
|
||||
engines: {node: '>=18'}
|
||||
|
|
@ -1585,6 +1607,9 @@ packages:
|
|||
'@types/statuses@2.0.6':
|
||||
resolution: {integrity: sha512-xMAgYwceFhRA2zY+XbEA7mxYbA093wdiW8Vu6gZPGWy9cmOyU9XesH1tNcEWsKFd5Vzrqx5T3D38PWx1FIIXkA==}
|
||||
|
||||
'@types/turndown@5.0.6':
|
||||
resolution: {integrity: sha512-ru00MoyeeouE5BX4gRL+6m/BsDfbRayOskWqUvh7CLGW+UXxHQItqALa38kKnOiZPqJrtzJUgAC2+F0rL1S4Pg==}
|
||||
|
||||
'@types/uuid@11.0.0':
|
||||
resolution: {integrity: sha512-HVyk8nj2m+jcFRNazzqyVKiZezyhDKrGUA3jlEcg/nZ6Ms+qHwocba1Y/AaVaznJTAM9xpdFSh+ptbNrhOGvZA==}
|
||||
deprecated: This is a stub types definition. uuid provides its own type definitions, so you do not need this installed.
|
||||
|
|
@ -1903,6 +1928,9 @@ packages:
|
|||
resolution: {integrity: sha512-oP5VkATKlNwcgvxi0vM0p/D3n2C3EReYVX+DNYs5TjZFn/oQt2j+4sVJtSMr18pdRr8wjTcBl6LoV+FUwzPmNA==}
|
||||
engines: {node: '>=18'}
|
||||
|
||||
boolbase@1.0.0:
|
||||
resolution: {integrity: sha512-JZOSA7Mo9sNGB8+UjSgzdLtokWAky1zbztM3WRLCbZ70/3cTANmQmOdR7y2g+J0e2WXywy1yS468tY+IruqEww==}
|
||||
|
||||
bowser@2.13.1:
|
||||
resolution: {integrity: sha512-OHawaAbjwx6rqICCKgSG0SAnT05bzd7ppyKLVUITZpANBaaMFBAsaNkto3LoQ31tyFP5kNujE8Cdx85G9VzOkw==}
|
||||
|
||||
|
|
@ -2076,11 +2104,21 @@ packages:
|
|||
resolution: {integrity: sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==}
|
||||
engines: {node: '>= 8'}
|
||||
|
||||
css-select@5.2.2:
|
||||
resolution: {integrity: sha512-TizTzUddG/xYLA3NXodFM0fSbNizXjOKhqiQQwvhlspadZokn1KDy0NZFS0wuEubIYAV5/c1/lAr0TaaFXEXzw==}
|
||||
|
||||
css-what@6.2.2:
|
||||
resolution: {integrity: sha512-u/O3vwbptzhMs3L1fQE82ZSLHQQfto5gyZzwteVIEyeaY5Fc7R4dapF/BvRoSYFeqfBk4m0V1Vafq5Pjv25wvA==}
|
||||
engines: {node: '>= 6'}
|
||||
|
||||
cssesc@3.0.0:
|
||||
resolution: {integrity: sha512-/Tb/JcjK111nNScGob5MNtsntNM1aCNUDipB/TkwZFhyDrrE47SOx/18wF2bbjgc3ZzCSKW1T5nt5EbFoAz/Vg==}
|
||||
engines: {node: '>=4'}
|
||||
hasBin: true
|
||||
|
||||
cssom@0.5.0:
|
||||
resolution: {integrity: sha512-iKuQcq+NdHqlAcwUY0o/HL69XQrUaQdMjmStJ8JFmUaiiQErlhrmuigkg/CU4E2J0IyUKUrMAgl36TvN67MqTw==}
|
||||
|
||||
csstype@3.2.3:
|
||||
resolution: {integrity: sha512-z1HGKcYy2xA8AGQfwrn0PAy+PB7X/GSj3UVJW9qKyn43xWa+gl5nXmU4qqLMRzWVLFC8KusUX8T/0kCiOYpAIQ==}
|
||||
|
||||
|
|
@ -2182,6 +2220,19 @@ packages:
|
|||
resolution: {integrity: sha512-35mSku4ZXK0vfCuHEDAwt55dg2jNajHZ1odvF+8SSr82EsZY4QmXfuWso8oEd8zRhVObSN18aM0CjSdoBX7zIw==}
|
||||
engines: {node: '>=0.10.0'}
|
||||
|
||||
dom-serializer@2.0.0:
|
||||
resolution: {integrity: sha512-wIkAryiqt/nV5EQKqQpo3SToSOV9J0DnbJqwK7Wv/Trc92zIAYZ4FlMu+JPFW1DfGFt81ZTCGgDEabffXeLyJg==}
|
||||
|
||||
domelementtype@2.3.0:
|
||||
resolution: {integrity: sha512-OLETBj6w0OsagBwdXnPdN0cnMfF9opN69co+7ZrbfPGrdpPVNBUj02spi6B1N7wChLQiPn4CSH/zJvXw56gmHw==}
|
||||
|
||||
domhandler@5.0.3:
|
||||
resolution: {integrity: sha512-cgwlv/1iFQiFnU96XXgROh8xTeetsnJiDsTc7TYCLFd9+/WNkIqPTxiM/8pSd8VIrhXGTf1Ny1q1hquVqDJB5w==}
|
||||
engines: {node: '>= 4'}
|
||||
|
||||
domutils@3.2.2:
|
||||
resolution: {integrity: sha512-6kZKyUajlDuqlHKVX1w7gyslj9MPIXzIFiz/rGu35uC1wMi+kMhQwGhl4lt9unC9Vb9INnY9Z3/ZA3+FhASLaw==}
|
||||
|
||||
dotenv@17.2.3:
|
||||
resolution: {integrity: sha512-JVUnt+DUIzu87TABbhPmNfVdBDt18BLOWjMUFJMSi/Qqg7NTYtabbvSNJGOJ7afbRuv9D/lngizHtP7QyLQ+9w==}
|
||||
engines: {node: '>=12'}
|
||||
|
|
@ -2237,6 +2288,14 @@ packages:
|
|||
resolution: {integrity: sha512-LgQMM4WXU3QI+SYgEc2liRgznaD5ojbmY3sb8LxyguVkIg5FxdpTkvk72te2R38/TGKxH634oLxXRGY6d7AP+Q==}
|
||||
engines: {node: '>=10.13.0'}
|
||||
|
||||
entities@4.5.0:
|
||||
resolution: {integrity: sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==}
|
||||
engines: {node: '>=0.12'}
|
||||
|
||||
entities@7.0.1:
|
||||
resolution: {integrity: sha512-TWrgLOFUQTH994YUyl1yT4uyavY5nNB5muff+RtWaqNVCAK408b5ZnnbNAUEWLTCpum9w6arT70i1XdQ4UeOPA==}
|
||||
engines: {node: '>=0.12'}
|
||||
|
||||
env-paths@2.2.1:
|
||||
resolution: {integrity: sha512-+h1lkLKhZMTYjog1VEpJNG7NZJWcuc2DDk/qsqSTRRCOXiLjeQ1d1/udrUGhqMxUgAlwKNZ0cf2uqan5GLuS2A==}
|
||||
engines: {node: '>=6'}
|
||||
|
|
@ -2726,6 +2785,12 @@ packages:
|
|||
resolution: {integrity: sha512-l7qMiNee7t82bH3SeyUCt9UF15EVmaBvsppY2zQtrbIhl/yzBTny+YUxsVjSjQ6gaqaeVtZmGocom8TzBlA4Yw==}
|
||||
engines: {node: '>=16.9.0'}
|
||||
|
||||
html-escaper@3.0.3:
|
||||
resolution: {integrity: sha512-RuMffC89BOWQoY0WKGpIhn5gX3iI54O6nRA0yC124NYVtzjmFWBIiFd8M0x+ZdX0P9R4lADg1mgP8C7PxGOWuQ==}
|
||||
|
||||
htmlparser2@10.1.0:
|
||||
resolution: {integrity: sha512-VTZkM9GWRAtEpveh7MSF6SjjrpNVNNVJfFup7xTY3UpFtm67foy9HDVXneLtFVt4pMz5kZtgNcvCniNFb1hlEQ==}
|
||||
|
||||
http-errors@2.0.1:
|
||||
resolution: {integrity: sha512-4FbRdAX+bSdmo4AUFuS0WNiPz8NgFt+r8ThgNWmlrjQjt1Q7ZR9+zTlce2859x4KSXrwIsaeTqDoKQmtP8pLmQ==}
|
||||
engines: {node: '>= 0.8'}
|
||||
|
|
@ -3140,6 +3205,15 @@ packages:
|
|||
lines-and-columns@1.2.4:
|
||||
resolution: {integrity: sha512-7ylylesZQ/PV29jhEDl3Ufjo6ZX7gCqJr5F7PKrqc93v7fzSymt1BpwEU8nAUXs8qzzvqhbjhK5QZg6Mt/HkBg==}
|
||||
|
||||
linkedom@0.18.12:
|
||||
resolution: {integrity: sha512-jalJsOwIKuQJSeTvsgzPe9iJzyfVaEJiEXl+25EkKevsULHvMJzpNqwvj1jOESWdmgKDiXObyjOYwlUqG7wo1Q==}
|
||||
engines: {node: '>=16'}
|
||||
peerDependencies:
|
||||
canvas: '>= 2'
|
||||
peerDependenciesMeta:
|
||||
canvas:
|
||||
optional: true
|
||||
|
||||
load-esm@1.0.3:
|
||||
resolution: {integrity: sha512-v5xlu8eHD1+6r8EHTg6hfmO97LN8ugKtiXcy5e6oN72iD2r6u0RPfLl6fxM+7Wnh2ZRq15o0russMst44WauPA==}
|
||||
engines: {node: '>=13.2.0'}
|
||||
|
|
@ -3353,6 +3427,9 @@ packages:
|
|||
resolution: {integrity: sha512-9qny7Z9DsQU8Ou39ERsPU4OZQlSTP47ShQzuKZ6PRXpYLtIFgl/DEBYEXKlvcEa+9tHVcK8CF81Y2V72qaZhWA==}
|
||||
engines: {node: '>=18'}
|
||||
|
||||
nth-check@2.1.1:
|
||||
resolution: {integrity: sha512-lqjrjmaOoAnWfMmBPL+XNnynZh2+swxiX3WUE0s4yEHI6m+AwrK2UZOimIRl3X/4QctVqS8AiZjFqyOGrMXb/w==}
|
||||
|
||||
object-assign@4.1.1:
|
||||
resolution: {integrity: sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg==}
|
||||
engines: {node: '>=0.10.0'}
|
||||
|
|
@ -4119,6 +4196,9 @@ packages:
|
|||
resolution: {integrity: sha512-hYbxnLEdvJF+DLALS+Ia+PbfNtn0sDP0hH2u9AFoskSUDmcVHSrtwHpzdX94MrRJKo9D9tYxY3MyP20gnlrWyA==}
|
||||
hasBin: true
|
||||
|
||||
turndown@7.2.2:
|
||||
resolution: {integrity: sha512-1F7db8BiExOKxjSMU2b7if62D/XOyQyZbPKq/nUwopfgnHlqXHqQ0lvfUTeUIr1lZJzOPFn43dODyMSIfvWRKQ==}
|
||||
|
||||
tw-animate-css@1.4.0:
|
||||
resolution: {integrity: sha512-7bziOlRqH0hJx80h/3mbicLW7o8qLsH5+RaLR2t+OHM3D0JlWGODQKQ4cxbK7WlvmUxpcj6Kgu6EKqjrGFe3QQ==}
|
||||
|
||||
|
|
@ -4169,6 +4249,9 @@ packages:
|
|||
engines: {node: '>=14.17'}
|
||||
hasBin: true
|
||||
|
||||
uhyphen@0.2.0:
|
||||
resolution: {integrity: sha512-qz3o9CHXmJJPGBdqzab7qAYuW8kQGKNEuoHFYrBwV6hWIMcpAmxDLXojcHfFr9US1Pe6zUswEIJIbLI610fuqA==}
|
||||
|
||||
uid@2.0.2:
|
||||
resolution: {integrity: sha512-u3xV3X7uzvi5b1MncmZo3i2Aw222Zk1keqLA1YkHldREkAhAqi65wuPfe7lHx8H/Wzy+8CE7S7uS3jekIM5s8g==}
|
||||
engines: {node: '>=8'}
|
||||
|
|
@ -5561,6 +5644,8 @@ snapshots:
|
|||
zod: 3.25.76
|
||||
zod-to-json-schema: 3.25.1(zod@3.25.76)
|
||||
|
||||
'@mixmark-io/domino@2.2.0': {}
|
||||
|
||||
'@modelcontextprotocol/sdk@1.25.3(hono@4.11.7)(zod@3.25.76)':
|
||||
dependencies:
|
||||
'@hono/node-server': 1.19.9(hono@4.11.7)
|
||||
|
|
@ -5606,6 +5691,8 @@ snapshots:
|
|||
- supports-color
|
||||
optional: true
|
||||
|
||||
'@mozilla/readability@0.6.0': {}
|
||||
|
||||
'@mswjs/interceptors@0.40.0':
|
||||
dependencies:
|
||||
'@open-draft/deferred-promise': 2.2.0
|
||||
|
|
@ -6204,6 +6291,8 @@ snapshots:
|
|||
|
||||
'@types/statuses@2.0.6': {}
|
||||
|
||||
'@types/turndown@5.0.6': {}
|
||||
|
||||
'@types/uuid@11.0.0':
|
||||
dependencies:
|
||||
uuid: 13.0.0
|
||||
|
|
@ -6531,6 +6620,8 @@ snapshots:
|
|||
transitivePeerDependencies:
|
||||
- supports-color
|
||||
|
||||
boolbase@1.0.0: {}
|
||||
|
||||
bowser@2.13.1: {}
|
||||
|
||||
brace-expansion@1.1.12:
|
||||
|
|
@ -6695,8 +6786,20 @@ snapshots:
|
|||
shebang-command: 2.0.0
|
||||
which: 2.0.2
|
||||
|
||||
css-select@5.2.2:
|
||||
dependencies:
|
||||
boolbase: 1.0.0
|
||||
css-what: 6.2.2
|
||||
domhandler: 5.0.3
|
||||
domutils: 3.2.2
|
||||
nth-check: 2.1.1
|
||||
|
||||
css-what@6.2.2: {}
|
||||
|
||||
cssesc@3.0.0: {}
|
||||
|
||||
cssom@0.5.0: {}
|
||||
|
||||
csstype@3.2.3: {}
|
||||
|
||||
damerau-levenshtein@1.0.8: {}
|
||||
|
|
@ -6776,6 +6879,24 @@ snapshots:
|
|||
dependencies:
|
||||
esutils: 2.0.3
|
||||
|
||||
dom-serializer@2.0.0:
|
||||
dependencies:
|
||||
domelementtype: 2.3.0
|
||||
domhandler: 5.0.3
|
||||
entities: 4.5.0
|
||||
|
||||
domelementtype@2.3.0: {}
|
||||
|
||||
domhandler@5.0.3:
|
||||
dependencies:
|
||||
domelementtype: 2.3.0
|
||||
|
||||
domutils@3.2.2:
|
||||
dependencies:
|
||||
dom-serializer: 2.0.0
|
||||
domelementtype: 2.3.0
|
||||
domhandler: 5.0.3
|
||||
|
||||
dotenv@17.2.3: {}
|
||||
|
||||
dunder-proto@1.0.1:
|
||||
|
|
@ -6848,6 +6969,10 @@ snapshots:
|
|||
graceful-fs: 4.2.11
|
||||
tapable: 2.3.0
|
||||
|
||||
entities@4.5.0: {}
|
||||
|
||||
entities@7.0.1: {}
|
||||
|
||||
env-paths@2.2.1: {}
|
||||
|
||||
error-ex@1.3.4:
|
||||
|
|
@ -7577,6 +7702,15 @@ snapshots:
|
|||
|
||||
hono@4.11.7: {}
|
||||
|
||||
html-escaper@3.0.3: {}
|
||||
|
||||
htmlparser2@10.1.0:
|
||||
dependencies:
|
||||
domelementtype: 2.3.0
|
||||
domhandler: 5.0.3
|
||||
domutils: 3.2.2
|
||||
entities: 7.0.1
|
||||
|
||||
http-errors@2.0.1:
|
||||
dependencies:
|
||||
depd: 2.0.0
|
||||
|
|
@ -7942,6 +8076,14 @@ snapshots:
|
|||
|
||||
lines-and-columns@1.2.4: {}
|
||||
|
||||
linkedom@0.18.12:
|
||||
dependencies:
|
||||
css-select: 5.2.2
|
||||
cssom: 0.5.0
|
||||
html-escaper: 3.0.3
|
||||
htmlparser2: 10.1.0
|
||||
uhyphen: 0.2.0
|
||||
|
||||
load-esm@1.0.3: {}
|
||||
|
||||
locate-path@6.0.0:
|
||||
|
|
@ -8135,6 +8277,10 @@ snapshots:
|
|||
path-key: 4.0.0
|
||||
unicorn-magic: 0.3.0
|
||||
|
||||
nth-check@2.1.1:
|
||||
dependencies:
|
||||
boolbase: 1.0.0
|
||||
|
||||
object-assign@4.1.1: {}
|
||||
|
||||
object-hash@3.0.0: {}
|
||||
|
|
@ -9075,6 +9221,10 @@ snapshots:
|
|||
turbo-windows-64: 2.8.0
|
||||
turbo-windows-arm64: 2.8.0
|
||||
|
||||
turndown@7.2.2:
|
||||
dependencies:
|
||||
'@mixmark-io/domino': 2.2.0
|
||||
|
||||
tw-animate-css@1.4.0: {}
|
||||
|
||||
type-check@0.4.0:
|
||||
|
|
@ -9144,6 +9294,8 @@ snapshots:
|
|||
|
||||
typescript@5.9.3: {}
|
||||
|
||||
uhyphen@0.2.0: {}
|
||||
|
||||
uid@2.0.2:
|
||||
dependencies:
|
||||
'@lukeed/csprng': 1.1.0
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ import type { AgentTool } from "@mariozechner/pi-agent-core";
|
|||
import { createExecTool } from "./tools/exec.js";
|
||||
import { createProcessTool } from "./tools/process.js";
|
||||
import { createGlobTool } from "./tools/glob.js";
|
||||
import { createWebFetchTool, createWebSearchTool } from "./tools/web/index.js";
|
||||
|
||||
export function resolveModel(options: AgentOptions) {
|
||||
if (options.provider && options.model) {
|
||||
|
|
@ -23,5 +24,14 @@ export function resolveTools(options: AgentOptions): AgentTool<any>[] {
|
|||
const execTool = createExecTool(cwd);
|
||||
const processTool = createProcessTool(cwd);
|
||||
const globTool = createGlobTool(cwd);
|
||||
return [...baseTools, execTool as AgentTool<any>, processTool as AgentTool<any>, globTool as AgentTool<any>];
|
||||
const webFetchTool = createWebFetchTool();
|
||||
const webSearchTool = createWebSearchTool();
|
||||
return [
|
||||
...baseTools,
|
||||
execTool as AgentTool<any>,
|
||||
processTool as AgentTool<any>,
|
||||
globTool as AgentTool<any>,
|
||||
webFetchTool as AgentTool<any>,
|
||||
webSearchTool as AgentTool<any>,
|
||||
];
|
||||
}
|
||||
|
|
|
|||
87
src/agent/tools/web/cache.ts
Normal file
87
src/agent/tools/web/cache.ts
Normal file
|
|
@ -0,0 +1,87 @@
|
|||
export type CacheEntry<T> = {
|
||||
value: T;
|
||||
expiresAt: number;
|
||||
insertedAt: number;
|
||||
};
|
||||
|
||||
export const DEFAULT_TIMEOUT_SECONDS = 30;
|
||||
export const DEFAULT_CACHE_TTL_MINUTES = 15;
|
||||
const DEFAULT_CACHE_MAX_ENTRIES = 100;
|
||||
|
||||
export function resolveTimeoutSeconds(value: unknown, fallback: number): number {
|
||||
const parsed = typeof value === "number" && Number.isFinite(value) ? value : fallback;
|
||||
return Math.max(1, Math.floor(parsed));
|
||||
}
|
||||
|
||||
export function resolveCacheTtlMs(value: unknown, fallbackMinutes: number): number {
|
||||
const minutes =
|
||||
typeof value === "number" && Number.isFinite(value) ? Math.max(0, value) : fallbackMinutes;
|
||||
return Math.round(minutes * 60_000);
|
||||
}
|
||||
|
||||
export function normalizeCacheKey(value: string): string {
|
||||
return value.trim().toLowerCase();
|
||||
}
|
||||
|
||||
export function readCache<T>(
|
||||
cache: Map<string, CacheEntry<T>>,
|
||||
key: string,
|
||||
): { value: T; cached: boolean } | null {
|
||||
const entry = cache.get(key);
|
||||
if (!entry) return null;
|
||||
if (Date.now() > entry.expiresAt) {
|
||||
cache.delete(key);
|
||||
return null;
|
||||
}
|
||||
return { value: entry.value, cached: true };
|
||||
}
|
||||
|
||||
export function writeCache<T>(
|
||||
cache: Map<string, CacheEntry<T>>,
|
||||
key: string,
|
||||
value: T,
|
||||
ttlMs: number,
|
||||
) {
|
||||
if (ttlMs <= 0) return;
|
||||
if (cache.size >= DEFAULT_CACHE_MAX_ENTRIES) {
|
||||
const oldest = cache.keys().next();
|
||||
if (!oldest.done) cache.delete(oldest.value);
|
||||
}
|
||||
cache.set(key, {
|
||||
value,
|
||||
expiresAt: Date.now() + ttlMs,
|
||||
insertedAt: Date.now(),
|
||||
});
|
||||
}
|
||||
|
||||
export function withTimeout(signal: AbortSignal | undefined, timeoutMs: number): AbortSignal {
|
||||
if (timeoutMs <= 0) return signal ?? new AbortController().signal;
|
||||
const controller = new AbortController();
|
||||
const timer = setTimeout(() => controller.abort(), timeoutMs);
|
||||
if (signal) {
|
||||
signal.addEventListener(
|
||||
"abort",
|
||||
() => {
|
||||
clearTimeout(timer);
|
||||
controller.abort();
|
||||
},
|
||||
{ once: true },
|
||||
);
|
||||
}
|
||||
controller.signal.addEventListener(
|
||||
"abort",
|
||||
() => {
|
||||
clearTimeout(timer);
|
||||
},
|
||||
{ once: true },
|
||||
);
|
||||
return controller.signal;
|
||||
}
|
||||
|
||||
export async function readResponseText(res: Response): Promise<string> {
|
||||
try {
|
||||
return await res.text();
|
||||
} catch {
|
||||
return "";
|
||||
}
|
||||
}
|
||||
208
src/agent/tools/web/html-utils.ts
Normal file
208
src/agent/tools/web/html-utils.ts
Normal file
|
|
@ -0,0 +1,208 @@
|
|||
import TurndownService from "turndown";
|
||||
|
||||
export type ExtractMode = "markdown" | "text";
|
||||
export type ExtractorType = "readability" | "turndown";
|
||||
|
||||
export type ExtractResult = {
|
||||
text: string;
|
||||
title?: string;
|
||||
};
|
||||
|
||||
export type ExtractResultWithExtractor = ExtractResult & {
|
||||
extractor: ExtractorType;
|
||||
};
|
||||
|
||||
function decodeEntities(value: string): string {
|
||||
return value
|
||||
.replace(/ /gi, " ")
|
||||
.replace(/&/gi, "&")
|
||||
.replace(/"/gi, '"')
|
||||
.replace(/'/gi, "'")
|
||||
.replace(/</gi, "<")
|
||||
.replace(/>/gi, ">")
|
||||
.replace(/&#x([0-9a-f]+);/gi, (_, hex) => String.fromCharCode(Number.parseInt(hex, 16)))
|
||||
.replace(/&#(\d+);/gi, (_, dec) => String.fromCharCode(Number.parseInt(dec, 10)));
|
||||
}
|
||||
|
||||
function stripTags(value: string): string {
|
||||
return decodeEntities(value.replace(/<[^>]+>/g, ""));
|
||||
}
|
||||
|
||||
function normalizeWhitespace(value: string): string {
|
||||
return value
|
||||
.replace(/\r/g, "")
|
||||
.replace(/[ \t]+\n/g, "\n")
|
||||
.replace(/\n{3,}/g, "\n\n")
|
||||
.replace(/[ \t]{2,}/g, " ")
|
||||
.trim();
|
||||
}
|
||||
|
||||
function extractTitle(html: string): string | undefined {
|
||||
const titleMatch = html.match(/<title[^>]*>([\s\S]*?)<\/title>/i);
|
||||
if (!titleMatch || !titleMatch[1]) return undefined;
|
||||
const title = normalizeWhitespace(stripTags(titleMatch[1]));
|
||||
return title || undefined;
|
||||
}
|
||||
|
||||
function buildResult(text: string, title: string | undefined): ExtractResult {
|
||||
if (title) {
|
||||
return { text, title };
|
||||
}
|
||||
return { text };
|
||||
}
|
||||
|
||||
function buildResultWithExtractor(
|
||||
text: string,
|
||||
title: string | undefined,
|
||||
extractor: ExtractorType,
|
||||
): ExtractResultWithExtractor {
|
||||
if (title) {
|
||||
return { text, title, extractor };
|
||||
}
|
||||
return { text, extractor };
|
||||
}
|
||||
|
||||
export function htmlToMarkdownSimple(html: string): ExtractResult {
|
||||
const title = extractTitle(html);
|
||||
let text = html
|
||||
.replace(/<script[\s\S]*?<\/script>/gi, "")
|
||||
.replace(/<style[\s\S]*?<\/style>/gi, "")
|
||||
.replace(/<noscript[\s\S]*?<\/noscript>/gi, "");
|
||||
text = text.replace(/<a\s+[^>]*href=["']([^"']+)["'][^>]*>([\s\S]*?)<\/a>/gi, (_, href, body) => {
|
||||
const label = normalizeWhitespace(stripTags(body));
|
||||
if (!label) return href;
|
||||
return `[${label}](${href})`;
|
||||
});
|
||||
text = text.replace(/<h([1-6])[^>]*>([\s\S]*?)<\/h\1>/gi, (_, level, body) => {
|
||||
const prefix = "#".repeat(Math.max(1, Math.min(6, Number.parseInt(level, 10))));
|
||||
const label = normalizeWhitespace(stripTags(body));
|
||||
return `\n${prefix} ${label}\n`;
|
||||
});
|
||||
text = text.replace(/<li[^>]*>([\s\S]*?)<\/li>/gi, (_, body) => {
|
||||
const label = normalizeWhitespace(stripTags(body));
|
||||
return label ? `\n- ${label}` : "";
|
||||
});
|
||||
text = text
|
||||
.replace(/<(br|hr)\s*\/?>/gi, "\n")
|
||||
.replace(/<\/(p|div|section|article|header|footer|table|tr|ul|ol)>/gi, "\n");
|
||||
text = stripTags(text);
|
||||
text = normalizeWhitespace(text);
|
||||
return buildResult(text, title);
|
||||
}
|
||||
|
||||
export function markdownToText(markdown: string): string {
|
||||
let text = markdown;
|
||||
text = text.replace(/!\[[^\]]*]\([^)]+\)/g, "");
|
||||
text = text.replace(/\[([^\]]+)]\([^)]+\)/g, "$1");
|
||||
text = text.replace(/```[\s\S]*?```/g, (block) =>
|
||||
block.replace(/```[^\n]*\n?/g, "").replace(/```/g, ""),
|
||||
);
|
||||
text = text.replace(/`([^`]+)`/g, "$1");
|
||||
text = text.replace(/^#{1,6}\s+/gm, "");
|
||||
text = text.replace(/^\s*[-*+]\s+/gm, "");
|
||||
text = text.replace(/^\s*\d+\.\s+/gm, "");
|
||||
return normalizeWhitespace(text);
|
||||
}
|
||||
|
||||
export function truncateText(
|
||||
value: string,
|
||||
maxChars: number,
|
||||
): { text: string; truncated: boolean } {
|
||||
if (value.length <= maxChars) return { text: value, truncated: false };
|
||||
return { text: value.slice(0, maxChars), truncated: true };
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert HTML to markdown using TurndownService (simpler, converts whole page)
|
||||
*/
|
||||
export function convertWithTurndown(html: string): ExtractResult {
|
||||
const title = extractTitle(html);
|
||||
|
||||
const turndownService = new TurndownService({
|
||||
headingStyle: "atx",
|
||||
hr: "---",
|
||||
bulletListMarker: "-",
|
||||
codeBlockStyle: "fenced",
|
||||
emDelimiter: "*",
|
||||
});
|
||||
turndownService.remove(["script", "style", "meta", "link", "noscript"]);
|
||||
|
||||
const text = normalizeWhitespace(turndownService.turndown(html));
|
||||
return buildResult(text, title);
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract readable content using Mozilla Readability (smarter, extracts main content)
|
||||
*/
|
||||
export async function extractWithReadability(params: {
|
||||
html: string;
|
||||
url: string;
|
||||
extractMode: ExtractMode;
|
||||
}): Promise<ExtractResult | null> {
|
||||
const fallback = (): ExtractResult => {
|
||||
const rendered = htmlToMarkdownSimple(params.html);
|
||||
if (params.extractMode === "text") {
|
||||
const text = markdownToText(rendered.text) || normalizeWhitespace(stripTags(params.html));
|
||||
return buildResult(text, rendered.title);
|
||||
}
|
||||
return rendered;
|
||||
};
|
||||
|
||||
try {
|
||||
const [{ Readability }, { parseHTML }] = await Promise.all([
|
||||
import("@mozilla/readability"),
|
||||
import("linkedom"),
|
||||
]);
|
||||
const { document } = parseHTML(params.html);
|
||||
try {
|
||||
(document as { baseURI?: string }).baseURI = params.url;
|
||||
} catch {
|
||||
// Best-effort base URI for relative links.
|
||||
}
|
||||
const reader = new Readability(document, { charThreshold: 0 });
|
||||
const parsed = reader.parse();
|
||||
if (!parsed?.content) return fallback();
|
||||
const title = parsed.title || undefined;
|
||||
if (params.extractMode === "text") {
|
||||
const text = normalizeWhitespace(parsed.textContent ?? "");
|
||||
if (!text) return fallback();
|
||||
return buildResult(text, title);
|
||||
}
|
||||
const rendered = htmlToMarkdownSimple(parsed.content);
|
||||
return buildResult(rendered.text, title ?? rendered.title);
|
||||
} catch {
|
||||
return fallback();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract content from HTML using the specified extractor
|
||||
*/
|
||||
export async function extractContent(params: {
|
||||
html: string;
|
||||
url: string;
|
||||
extractMode: ExtractMode;
|
||||
extractor: ExtractorType;
|
||||
}): Promise<ExtractResultWithExtractor> {
|
||||
if (params.extractor === "turndown") {
|
||||
const result = convertWithTurndown(params.html);
|
||||
const text = params.extractMode === "text" ? markdownToText(result.text) : result.text;
|
||||
return buildResultWithExtractor(text, result.title, "turndown");
|
||||
}
|
||||
|
||||
// Default: readability
|
||||
const result = await extractWithReadability({
|
||||
html: params.html,
|
||||
url: params.url,
|
||||
extractMode: params.extractMode,
|
||||
});
|
||||
|
||||
if (result) {
|
||||
return buildResultWithExtractor(result.text, result.title, "readability");
|
||||
}
|
||||
|
||||
// Fallback to turndown if readability fails
|
||||
const fallback = convertWithTurndown(params.html);
|
||||
const text = params.extractMode === "text" ? markdownToText(fallback.text) : fallback.text;
|
||||
return buildResultWithExtractor(text, fallback.title, "turndown");
|
||||
}
|
||||
2
src/agent/tools/web/index.ts
Normal file
2
src/agent/tools/web/index.ts
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
export { createWebFetchTool, type WebFetchResult } from "./web-fetch.js";
|
||||
export { createWebSearchTool, type WebSearchResult } from "./web-search.js";
|
||||
73
src/agent/tools/web/param-helpers.ts
Normal file
73
src/agent/tools/web/param-helpers.ts
Normal file
|
|
@ -0,0 +1,73 @@
|
|||
import type { AgentToolResult } from "@mariozechner/pi-agent-core";
|
||||
|
||||
export type StringParamOptions = {
|
||||
required?: boolean;
|
||||
trim?: boolean;
|
||||
label?: string;
|
||||
allowEmpty?: boolean;
|
||||
};
|
||||
|
||||
export function readStringParam(
|
||||
params: Record<string, unknown>,
|
||||
key: string,
|
||||
options: StringParamOptions & { required: true },
|
||||
): string;
|
||||
export function readStringParam(
|
||||
params: Record<string, unknown>,
|
||||
key: string,
|
||||
options?: StringParamOptions,
|
||||
): string | undefined;
|
||||
export function readStringParam(
|
||||
params: Record<string, unknown>,
|
||||
key: string,
|
||||
options: StringParamOptions = {},
|
||||
) {
|
||||
const { required = false, trim = true, label = key, allowEmpty = false } = options;
|
||||
const raw = params[key];
|
||||
if (typeof raw !== "string") {
|
||||
if (required) throw new Error(`${label} required`);
|
||||
return undefined;
|
||||
}
|
||||
const value = trim ? raw.trim() : raw;
|
||||
if (!value && !allowEmpty) {
|
||||
if (required) throw new Error(`${label} required`);
|
||||
return undefined;
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
export function readNumberParam(
|
||||
params: Record<string, unknown>,
|
||||
key: string,
|
||||
options: { required?: boolean; label?: string; integer?: boolean } = {},
|
||||
): number | undefined {
|
||||
const { required = false, label = key, integer = false } = options;
|
||||
const raw = params[key];
|
||||
let value: number | undefined;
|
||||
if (typeof raw === "number" && Number.isFinite(raw)) {
|
||||
value = raw;
|
||||
} else if (typeof raw === "string") {
|
||||
const trimmed = raw.trim();
|
||||
if (trimmed) {
|
||||
const parsed = Number.parseFloat(trimmed);
|
||||
if (Number.isFinite(parsed)) value = parsed;
|
||||
}
|
||||
}
|
||||
if (value === undefined) {
|
||||
if (required) throw new Error(`${label} required`);
|
||||
return undefined;
|
||||
}
|
||||
return integer ? Math.trunc(value) : value;
|
||||
}
|
||||
|
||||
export function jsonResult(payload: unknown): AgentToolResult<unknown> {
|
||||
return {
|
||||
content: [
|
||||
{
|
||||
type: "text",
|
||||
text: JSON.stringify(payload, null, 2),
|
||||
},
|
||||
],
|
||||
details: payload,
|
||||
};
|
||||
}
|
||||
244
src/agent/tools/web/ssrf.ts
Normal file
244
src/agent/tools/web/ssrf.ts
Normal file
|
|
@ -0,0 +1,244 @@
|
|||
import { lookup as dnsLookup } from "node:dns/promises";
|
||||
import { lookup as dnsLookupCb, type LookupAddress } from "node:dns";
|
||||
import { Agent, type Dispatcher } from "undici";
|
||||
|
||||
type LookupCallback = (
|
||||
err: NodeJS.ErrnoException | null,
|
||||
address: string | LookupAddress[],
|
||||
family?: number,
|
||||
) => void;
|
||||
|
||||
export class SsrfBlockedError extends Error {
|
||||
constructor(message: string) {
|
||||
super(message);
|
||||
this.name = "SsrfBlockedError";
|
||||
}
|
||||
}
|
||||
|
||||
type LookupFn = typeof dnsLookup;
|
||||
|
||||
const PRIVATE_IPV6_PREFIXES = ["fe80:", "fec0:", "fc", "fd"];
|
||||
const BLOCKED_HOSTNAMES = new Set(["localhost", "metadata.google.internal"]);
|
||||
|
||||
function normalizeHostname(hostname: string): string {
|
||||
const normalized = hostname.trim().toLowerCase().replace(/\.$/, "");
|
||||
if (normalized.startsWith("[") && normalized.endsWith("]")) {
|
||||
return normalized.slice(1, -1);
|
||||
}
|
||||
return normalized;
|
||||
}
|
||||
|
||||
function parseIpv4(address: string): number[] | null {
|
||||
const parts = address.split(".");
|
||||
if (parts.length !== 4) return null;
|
||||
const numbers = parts.map((part) => Number.parseInt(part, 10));
|
||||
if (numbers.some((value) => Number.isNaN(value) || value < 0 || value > 255)) return null;
|
||||
return numbers;
|
||||
}
|
||||
|
||||
function parseIpv4FromMappedIpv6(mapped: string): number[] | null {
|
||||
if (mapped.includes(".")) {
|
||||
return parseIpv4(mapped);
|
||||
}
|
||||
const parts = mapped.split(":").filter(Boolean);
|
||||
if (parts.length === 1) {
|
||||
const part0 = parts[0];
|
||||
if (!part0) return null;
|
||||
const value = Number.parseInt(part0, 16);
|
||||
if (Number.isNaN(value) || value < 0 || value > 0xffff_ffff) return null;
|
||||
return [(value >>> 24) & 0xff, (value >>> 16) & 0xff, (value >>> 8) & 0xff, value & 0xff];
|
||||
}
|
||||
if (parts.length !== 2) return null;
|
||||
const part0 = parts[0];
|
||||
const part1 = parts[1];
|
||||
if (!part0 || !part1) return null;
|
||||
const high = Number.parseInt(part0, 16);
|
||||
const low = Number.parseInt(part1, 16);
|
||||
if (
|
||||
Number.isNaN(high) ||
|
||||
Number.isNaN(low) ||
|
||||
high < 0 ||
|
||||
low < 0 ||
|
||||
high > 0xffff ||
|
||||
low > 0xffff
|
||||
) {
|
||||
return null;
|
||||
}
|
||||
const value = (high << 16) + low;
|
||||
return [(value >>> 24) & 0xff, (value >>> 16) & 0xff, (value >>> 8) & 0xff, value & 0xff];
|
||||
}
|
||||
|
||||
function isPrivateIpv4(parts: number[]): boolean {
|
||||
const octet1 = parts[0];
|
||||
const octet2 = parts[1];
|
||||
if (octet1 === undefined || octet2 === undefined) return false;
|
||||
if (octet1 === 0) return true;
|
||||
if (octet1 === 10) return true;
|
||||
if (octet1 === 127) return true;
|
||||
if (octet1 === 169 && octet2 === 254) return true;
|
||||
if (octet1 === 172 && octet2 >= 16 && octet2 <= 31) return true;
|
||||
if (octet1 === 192 && octet2 === 168) return true;
|
||||
if (octet1 === 100 && octet2 >= 64 && octet2 <= 127) return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
export function isPrivateIpAddress(address: string): boolean {
|
||||
let normalized = address.trim().toLowerCase();
|
||||
if (normalized.startsWith("[") && normalized.endsWith("]")) {
|
||||
normalized = normalized.slice(1, -1);
|
||||
}
|
||||
if (!normalized) return false;
|
||||
|
||||
if (normalized.startsWith("::ffff:")) {
|
||||
const mapped = normalized.slice("::ffff:".length);
|
||||
const ipv4 = parseIpv4FromMappedIpv6(mapped);
|
||||
if (ipv4) return isPrivateIpv4(ipv4);
|
||||
}
|
||||
|
||||
if (normalized.includes(":")) {
|
||||
if (normalized === "::" || normalized === "::1") return true;
|
||||
return PRIVATE_IPV6_PREFIXES.some((prefix) => normalized.startsWith(prefix));
|
||||
}
|
||||
|
||||
const ipv4 = parseIpv4(normalized);
|
||||
if (!ipv4) return false;
|
||||
return isPrivateIpv4(ipv4);
|
||||
}
|
||||
|
||||
export function isBlockedHostname(hostname: string): boolean {
|
||||
const normalized = normalizeHostname(hostname);
|
||||
if (!normalized) return false;
|
||||
if (BLOCKED_HOSTNAMES.has(normalized)) return true;
|
||||
return (
|
||||
normalized.endsWith(".localhost") ||
|
||||
normalized.endsWith(".local") ||
|
||||
normalized.endsWith(".internal")
|
||||
);
|
||||
}
|
||||
|
||||
export function createPinnedLookup(params: {
|
||||
hostname: string;
|
||||
addresses: string[];
|
||||
fallback?: typeof dnsLookupCb;
|
||||
}): typeof dnsLookupCb {
|
||||
const normalizedHost = normalizeHostname(params.hostname);
|
||||
const fallback = params.fallback ?? dnsLookupCb;
|
||||
const fallbackLookup = fallback as unknown as (
|
||||
hostname: string,
|
||||
callback: LookupCallback,
|
||||
) => void;
|
||||
const fallbackWithOptions = fallback as unknown as (
|
||||
hostname: string,
|
||||
options: unknown,
|
||||
callback: LookupCallback,
|
||||
) => void;
|
||||
const records = params.addresses.map((address) => ({
|
||||
address,
|
||||
family: address.includes(":") ? 6 : 4,
|
||||
}));
|
||||
let index = 0;
|
||||
|
||||
return ((host: string, options?: unknown, callback?: unknown) => {
|
||||
const cb: LookupCallback =
|
||||
typeof options === "function" ? (options as LookupCallback) : (callback as LookupCallback);
|
||||
if (!cb) return;
|
||||
const normalized = normalizeHostname(host);
|
||||
if (!normalized || normalized !== normalizedHost) {
|
||||
if (typeof options === "function" || options === undefined) {
|
||||
return fallbackLookup(host, cb);
|
||||
}
|
||||
return fallbackWithOptions(host, options, cb);
|
||||
}
|
||||
|
||||
const opts =
|
||||
typeof options === "object" && options !== null
|
||||
? (options as { all?: boolean; family?: number })
|
||||
: {};
|
||||
const requestedFamily =
|
||||
typeof options === "number" ? options : typeof opts.family === "number" ? opts.family : 0;
|
||||
const candidates =
|
||||
requestedFamily === 4 || requestedFamily === 6
|
||||
? records.filter((entry) => entry.family === requestedFamily)
|
||||
: records;
|
||||
const usable = candidates.length > 0 ? candidates : records;
|
||||
if (opts.all) {
|
||||
cb(null, usable as LookupAddress[]);
|
||||
return;
|
||||
}
|
||||
const chosen = usable[index % usable.length];
|
||||
index += 1;
|
||||
if (chosen) {
|
||||
cb(null, chosen.address, chosen.family);
|
||||
}
|
||||
}) as typeof dnsLookupCb;
|
||||
}
|
||||
|
||||
export type PinnedHostname = {
|
||||
hostname: string;
|
||||
addresses: string[];
|
||||
lookup: typeof dnsLookupCb;
|
||||
};
|
||||
|
||||
export async function resolvePinnedHostname(
|
||||
hostname: string,
|
||||
lookupFn: LookupFn = dnsLookup,
|
||||
): Promise<PinnedHostname> {
|
||||
const normalized = normalizeHostname(hostname);
|
||||
if (!normalized) {
|
||||
throw new Error("Invalid hostname");
|
||||
}
|
||||
|
||||
if (isBlockedHostname(normalized)) {
|
||||
throw new SsrfBlockedError(`Blocked hostname: ${hostname}`);
|
||||
}
|
||||
|
||||
if (isPrivateIpAddress(normalized)) {
|
||||
throw new SsrfBlockedError("Blocked: private/internal IP address");
|
||||
}
|
||||
|
||||
const results = await lookupFn(normalized, { all: true });
|
||||
if (results.length === 0) {
|
||||
throw new Error(`Unable to resolve hostname: ${hostname}`);
|
||||
}
|
||||
|
||||
for (const entry of results) {
|
||||
if (isPrivateIpAddress(entry.address)) {
|
||||
throw new SsrfBlockedError("Blocked: resolves to private/internal IP address");
|
||||
}
|
||||
}
|
||||
|
||||
const addresses = Array.from(new Set(results.map((entry) => entry.address)));
|
||||
if (addresses.length === 0) {
|
||||
throw new Error(`Unable to resolve hostname: ${hostname}`);
|
||||
}
|
||||
|
||||
return {
|
||||
hostname: normalized,
|
||||
addresses,
|
||||
lookup: createPinnedLookup({ hostname: normalized, addresses }),
|
||||
};
|
||||
}
|
||||
|
||||
export function createPinnedDispatcher(pinned: PinnedHostname): Dispatcher {
|
||||
return new Agent({
|
||||
connect: {
|
||||
lookup: pinned.lookup,
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
export async function closeDispatcher(dispatcher?: Dispatcher | null): Promise<void> {
|
||||
if (!dispatcher) return;
|
||||
const candidate = dispatcher as { close?: () => Promise<void> | void; destroy?: () => void };
|
||||
try {
|
||||
if (typeof candidate.close === "function") {
|
||||
await candidate.close();
|
||||
return;
|
||||
}
|
||||
if (typeof candidate.destroy === "function") {
|
||||
candidate.destroy();
|
||||
}
|
||||
} catch {
|
||||
// ignore dispatcher cleanup errors
|
||||
}
|
||||
}
|
||||
335
src/agent/tools/web/web-fetch.ts
Normal file
335
src/agent/tools/web/web-fetch.ts
Normal file
|
|
@ -0,0 +1,335 @@
|
|||
import { Type } from "@sinclair/typebox";
|
||||
import type { AgentTool } from "@mariozechner/pi-agent-core";
|
||||
import type { Dispatcher } from "undici";
|
||||
|
||||
import {
|
||||
closeDispatcher,
|
||||
createPinnedDispatcher,
|
||||
resolvePinnedHostname,
|
||||
SsrfBlockedError,
|
||||
} from "./ssrf.js";
|
||||
import {
|
||||
DEFAULT_CACHE_TTL_MINUTES,
|
||||
DEFAULT_TIMEOUT_SECONDS,
|
||||
normalizeCacheKey,
|
||||
readCache,
|
||||
readResponseText,
|
||||
resolveCacheTtlMs,
|
||||
resolveTimeoutSeconds,
|
||||
withTimeout,
|
||||
writeCache,
|
||||
} from "./cache.js";
|
||||
import type { CacheEntry } from "./cache.js";
|
||||
import { extractContent, markdownToText, truncateText, type ExtractMode, type ExtractorType } from "./html-utils.js";
|
||||
import { jsonResult, readNumberParam, readStringParam } from "./param-helpers.js";
|
||||
|
||||
const EXTRACT_MODES = ["markdown", "text"] as const;
|
||||
const EXTRACTOR_TYPES = ["readability", "turndown"] as const;
|
||||
|
||||
const DEFAULT_FETCH_MAX_CHARS = 50_000;
|
||||
const DEFAULT_FETCH_MAX_REDIRECTS = 3;
|
||||
const DEFAULT_ERROR_MAX_CHARS = 4_000;
|
||||
const DEFAULT_FETCH_USER_AGENT =
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 14_7_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36";
|
||||
|
||||
const FETCH_CACHE = new Map<string, CacheEntry<Record<string, unknown>>>();
|
||||
|
||||
const WebFetchSchema = Type.Object({
|
||||
url: Type.String({ description: "HTTP or HTTPS URL to fetch." }),
|
||||
extractMode: Type.Optional(
|
||||
Type.String({
|
||||
description: 'Output format: "markdown" (default) or "text" (plain text).',
|
||||
}),
|
||||
),
|
||||
extractor: Type.Optional(
|
||||
Type.String({
|
||||
description:
|
||||
'Extraction method: "readability" (default, smart extraction of main content) or "turndown" (convert entire page).',
|
||||
}),
|
||||
),
|
||||
maxChars: Type.Optional(
|
||||
Type.Number({
|
||||
description: "Maximum characters to return (truncates when exceeded). Default: 50000.",
|
||||
minimum: 100,
|
||||
}),
|
||||
),
|
||||
});
|
||||
|
||||
type WebFetchArgs = {
|
||||
url: string;
|
||||
extractMode?: string;
|
||||
extractor?: string;
|
||||
maxChars?: number;
|
||||
};
|
||||
|
||||
export type WebFetchResult = {
|
||||
url: string;
|
||||
finalUrl: string;
|
||||
status: number;
|
||||
contentType: string;
|
||||
title?: string;
|
||||
extractMode: ExtractMode;
|
||||
extractor: ExtractorType | "raw" | "json";
|
||||
truncated: boolean;
|
||||
length: number;
|
||||
fetchedAt: string;
|
||||
tookMs: number;
|
||||
text: string;
|
||||
cached?: boolean;
|
||||
};
|
||||
|
||||
function resolveMaxChars(value: unknown, fallback: number): number {
|
||||
const parsed = typeof value === "number" && Number.isFinite(value) ? value : fallback;
|
||||
return Math.max(100, Math.floor(parsed));
|
||||
}
|
||||
|
||||
function resolveMaxRedirects(value: unknown, fallback: number): number {
|
||||
const parsed = typeof value === "number" && Number.isFinite(value) ? value : fallback;
|
||||
return Math.max(0, Math.floor(parsed));
|
||||
}
|
||||
|
||||
function looksLikeHtml(value: string): boolean {
|
||||
const trimmed = value.trimStart();
|
||||
if (!trimmed) return false;
|
||||
const head = trimmed.slice(0, 256).toLowerCase();
|
||||
return head.startsWith("<!doctype html") || head.startsWith("<html");
|
||||
}
|
||||
|
||||
function isRedirectStatus(status: number): boolean {
|
||||
return status === 301 || status === 302 || status === 303 || status === 307 || status === 308;
|
||||
}
|
||||
|
||||
async function fetchWithRedirects(params: {
|
||||
url: string;
|
||||
maxRedirects: number;
|
||||
timeoutSeconds: number;
|
||||
userAgent: string;
|
||||
}): Promise<{ response: Response; finalUrl: string; dispatcher: Dispatcher }> {
|
||||
const signal = withTimeout(undefined, params.timeoutSeconds * 1000);
|
||||
const visited = new Set<string>();
|
||||
let currentUrl = params.url;
|
||||
let redirectCount = 0;
|
||||
|
||||
while (true) {
|
||||
let parsedUrl: URL;
|
||||
try {
|
||||
parsedUrl = new URL(currentUrl);
|
||||
} catch {
|
||||
throw new Error("Invalid URL: must be http or https");
|
||||
}
|
||||
if (!["http:", "https:"].includes(parsedUrl.protocol)) {
|
||||
throw new Error("Invalid URL: must be http or https");
|
||||
}
|
||||
|
||||
const pinned = await resolvePinnedHostname(parsedUrl.hostname);
|
||||
const dispatcher = createPinnedDispatcher(pinned);
|
||||
let res: Response;
|
||||
try {
|
||||
// Use undici's dispatcher for SSRF protection
|
||||
res = await fetch(parsedUrl.toString(), {
|
||||
method: "GET",
|
||||
headers: {
|
||||
Accept: "*/*",
|
||||
"User-Agent": params.userAgent,
|
||||
"Accept-Language": "en-US,en;q=0.9",
|
||||
},
|
||||
signal,
|
||||
redirect: "manual",
|
||||
dispatcher,
|
||||
} as unknown as RequestInit);
|
||||
} catch (err) {
|
||||
await closeDispatcher(dispatcher);
|
||||
throw err;
|
||||
}
|
||||
|
||||
if (isRedirectStatus(res.status)) {
|
||||
const location = res.headers.get("location");
|
||||
if (!location) {
|
||||
await closeDispatcher(dispatcher);
|
||||
throw new Error(`Redirect missing location header (${res.status})`);
|
||||
}
|
||||
redirectCount += 1;
|
||||
if (redirectCount > params.maxRedirects) {
|
||||
await closeDispatcher(dispatcher);
|
||||
throw new Error(`Too many redirects (limit: ${params.maxRedirects})`);
|
||||
}
|
||||
const nextUrl = new URL(location, parsedUrl).toString();
|
||||
if (visited.has(nextUrl)) {
|
||||
await closeDispatcher(dispatcher);
|
||||
throw new Error("Redirect loop detected");
|
||||
}
|
||||
visited.add(nextUrl);
|
||||
void res.body?.cancel();
|
||||
await closeDispatcher(dispatcher);
|
||||
currentUrl = nextUrl;
|
||||
continue;
|
||||
}
|
||||
|
||||
return { response: res, finalUrl: currentUrl, dispatcher };
|
||||
}
|
||||
}
|
||||
|
||||
function formatWebFetchErrorDetail(params: {
|
||||
detail: string;
|
||||
contentType?: string | null;
|
||||
maxChars: number;
|
||||
}): string {
|
||||
const { detail, contentType, maxChars } = params;
|
||||
if (!detail) return "";
|
||||
let text = detail;
|
||||
const contentTypeLower = contentType?.toLowerCase();
|
||||
if (contentTypeLower?.includes("text/html") || looksLikeHtml(detail)) {
|
||||
text = markdownToText(detail);
|
||||
}
|
||||
const truncated = truncateText(text.trim(), maxChars);
|
||||
return truncated.text;
|
||||
}
|
||||
|
||||
async function runWebFetch(params: {
|
||||
url: string;
|
||||
extractMode: ExtractMode;
|
||||
extractor: ExtractorType;
|
||||
maxChars: number;
|
||||
maxRedirects: number;
|
||||
timeoutSeconds: number;
|
||||
cacheTtlMs: number;
|
||||
userAgent: string;
|
||||
}): Promise<WebFetchResult> {
|
||||
const cacheKey = normalizeCacheKey(
|
||||
`fetch:${params.url}:${params.extractMode}:${params.extractor}:${params.maxChars}`,
|
||||
);
|
||||
const cached = readCache(FETCH_CACHE, cacheKey);
|
||||
if (cached) return { ...cached.value, cached: true } as WebFetchResult;
|
||||
|
||||
let parsedUrl: URL;
|
||||
try {
|
||||
parsedUrl = new URL(params.url);
|
||||
} catch {
|
||||
throw new Error("Invalid URL: must be http or https");
|
||||
}
|
||||
if (!["http:", "https:"].includes(parsedUrl.protocol)) {
|
||||
throw new Error("Invalid URL: must be http or https");
|
||||
}
|
||||
|
||||
const start = Date.now();
|
||||
let res: Response;
|
||||
let dispatcher: Dispatcher | null = null;
|
||||
let finalUrl = params.url;
|
||||
|
||||
const result = await fetchWithRedirects({
|
||||
url: params.url,
|
||||
maxRedirects: params.maxRedirects,
|
||||
timeoutSeconds: params.timeoutSeconds,
|
||||
userAgent: params.userAgent,
|
||||
});
|
||||
res = result.response;
|
||||
finalUrl = result.finalUrl;
|
||||
dispatcher = result.dispatcher;
|
||||
|
||||
try {
|
||||
if (!res.ok) {
|
||||
const rawDetail = await readResponseText(res);
|
||||
const detail = formatWebFetchErrorDetail({
|
||||
detail: rawDetail,
|
||||
contentType: res.headers.get("content-type"),
|
||||
maxChars: DEFAULT_ERROR_MAX_CHARS,
|
||||
});
|
||||
throw new Error(`Web fetch failed (${res.status}): ${detail || res.statusText}`);
|
||||
}
|
||||
|
||||
const contentType = res.headers.get("content-type") ?? "application/octet-stream";
|
||||
const body = await readResponseText(res);
|
||||
|
||||
let title: string | undefined;
|
||||
let extractor: ExtractorType | "raw" | "json" = "raw";
|
||||
let text = body;
|
||||
|
||||
if (contentType.includes("text/html")) {
|
||||
const extracted = await extractContent({
|
||||
html: body,
|
||||
url: finalUrl,
|
||||
extractMode: params.extractMode,
|
||||
extractor: params.extractor,
|
||||
});
|
||||
text = extracted.text;
|
||||
title = extracted.title;
|
||||
extractor = extracted.extractor;
|
||||
} else if (contentType.includes("application/json")) {
|
||||
try {
|
||||
text = JSON.stringify(JSON.parse(body), null, 2);
|
||||
extractor = "json";
|
||||
} catch {
|
||||
text = body;
|
||||
extractor = "raw";
|
||||
}
|
||||
}
|
||||
|
||||
const truncated = truncateText(text, params.maxChars);
|
||||
const payload: WebFetchResult = {
|
||||
url: params.url,
|
||||
finalUrl,
|
||||
status: res.status,
|
||||
contentType,
|
||||
extractMode: params.extractMode,
|
||||
extractor,
|
||||
truncated: truncated.truncated,
|
||||
length: truncated.text.length,
|
||||
fetchedAt: new Date().toISOString(),
|
||||
tookMs: Date.now() - start,
|
||||
text: truncated.text,
|
||||
};
|
||||
if (title) {
|
||||
payload.title = title;
|
||||
}
|
||||
writeCache(FETCH_CACHE, cacheKey, payload, params.cacheTtlMs);
|
||||
return payload;
|
||||
} finally {
|
||||
await closeDispatcher(dispatcher);
|
||||
}
|
||||
}
|
||||
|
||||
export function createWebFetchTool(): AgentTool<typeof WebFetchSchema, unknown> {
|
||||
return {
|
||||
name: "web_fetch",
|
||||
label: "Web Fetch",
|
||||
description:
|
||||
'Fetch and extract readable content from a URL. Converts HTML to markdown or plain text. Use extractor="readability" for smart article extraction, or "turndown" for full page conversion.',
|
||||
parameters: WebFetchSchema,
|
||||
execute: async (_toolCallId, args) => {
|
||||
const params = args as WebFetchArgs;
|
||||
const url = readStringParam(params as Record<string, unknown>, "url", { required: true });
|
||||
const extractModeRaw = readStringParam(params as Record<string, unknown>, "extractMode");
|
||||
const extractMode: ExtractMode =
|
||||
extractModeRaw === "text" ? "text" : "markdown";
|
||||
const extractorRaw = readStringParam(params as Record<string, unknown>, "extractor");
|
||||
const extractor: ExtractorType =
|
||||
extractorRaw === "turndown" ? "turndown" : "readability";
|
||||
const maxChars = readNumberParam(params as Record<string, unknown>, "maxChars", { integer: true });
|
||||
|
||||
try {
|
||||
const result = await runWebFetch({
|
||||
url,
|
||||
extractMode,
|
||||
extractor,
|
||||
maxChars: resolveMaxChars(maxChars, DEFAULT_FETCH_MAX_CHARS),
|
||||
maxRedirects: DEFAULT_FETCH_MAX_REDIRECTS,
|
||||
timeoutSeconds: DEFAULT_TIMEOUT_SECONDS,
|
||||
cacheTtlMs: resolveCacheTtlMs(DEFAULT_CACHE_TTL_MINUTES, DEFAULT_CACHE_TTL_MINUTES),
|
||||
userAgent: DEFAULT_FETCH_USER_AGENT,
|
||||
});
|
||||
return jsonResult(result);
|
||||
} catch (error) {
|
||||
if (error instanceof SsrfBlockedError) {
|
||||
return jsonResult({
|
||||
error: "ssrf_blocked",
|
||||
message: error.message,
|
||||
});
|
||||
}
|
||||
return jsonResult({
|
||||
error: "fetch_failed",
|
||||
message: error instanceof Error ? error.message : String(error),
|
||||
});
|
||||
}
|
||||
},
|
||||
};
|
||||
}
|
||||
451
src/agent/tools/web/web-search.ts
Normal file
451
src/agent/tools/web/web-search.ts
Normal file
|
|
@ -0,0 +1,451 @@
|
|||
import { Type } from "@sinclair/typebox";
|
||||
import type { AgentTool } from "@mariozechner/pi-agent-core";
|
||||
|
||||
import {
|
||||
DEFAULT_CACHE_TTL_MINUTES,
|
||||
DEFAULT_TIMEOUT_SECONDS,
|
||||
normalizeCacheKey,
|
||||
readCache,
|
||||
readResponseText,
|
||||
resolveCacheTtlMs,
|
||||
resolveTimeoutSeconds,
|
||||
withTimeout,
|
||||
writeCache,
|
||||
} from "./cache.js";
|
||||
import type { CacheEntry } from "./cache.js";
|
||||
import { jsonResult, readNumberParam, readStringParam } from "./param-helpers.js";
|
||||
|
||||
const SEARCH_PROVIDERS = ["brave", "perplexity"] as const;
|
||||
type SearchProvider = (typeof SEARCH_PROVIDERS)[number];
|
||||
|
||||
const DEFAULT_SEARCH_COUNT = 5;
|
||||
const MAX_SEARCH_COUNT = 10;
|
||||
|
||||
const BRAVE_SEARCH_ENDPOINT = "https://api.search.brave.com/res/v1/web/search";
|
||||
const DEFAULT_PERPLEXITY_BASE_URL = "https://openrouter.ai/api/v1";
|
||||
const PERPLEXITY_DIRECT_BASE_URL = "https://api.perplexity.ai";
|
||||
const DEFAULT_PERPLEXITY_MODEL = "perplexity/sonar-pro";
|
||||
const PERPLEXITY_KEY_PREFIXES = ["pplx-"];
|
||||
const OPENROUTER_KEY_PREFIXES = ["sk-or-"];
|
||||
|
||||
const SEARCH_CACHE = new Map<string, CacheEntry<Record<string, unknown>>>();
|
||||
const BRAVE_FRESHNESS_SHORTCUTS = new Set(["pd", "pw", "pm", "py"]);
|
||||
const BRAVE_FRESHNESS_RANGE = /^(\d{4}-\d{2}-\d{2})to(\d{4}-\d{2}-\d{2})$/;
|
||||
|
||||
const WebSearchSchema = Type.Object({
|
||||
query: Type.String({ description: "Search query string." }),
|
||||
provider: Type.Optional(
|
||||
Type.String({
|
||||
description:
|
||||
'Search provider: "brave" (default, traditional search results) or "perplexity" (AI-synthesized answers).',
|
||||
}),
|
||||
),
|
||||
count: Type.Optional(
|
||||
Type.Number({
|
||||
description: "Number of results to return (1-10). Default: 5. Brave only.",
|
||||
minimum: 1,
|
||||
maximum: MAX_SEARCH_COUNT,
|
||||
}),
|
||||
),
|
||||
country: Type.Optional(
|
||||
Type.String({
|
||||
description:
|
||||
"2-letter country code for region-specific results (e.g., 'DE', 'US'). Default: 'US'.",
|
||||
}),
|
||||
),
|
||||
freshness: Type.Optional(
|
||||
Type.String({
|
||||
description:
|
||||
"Filter results by time (Brave only): 'pd' (past day), 'pw' (past week), 'pm' (past month), 'py' (past year), or 'YYYY-MM-DDtoYYYY-MM-DD'.",
|
||||
}),
|
||||
),
|
||||
});
|
||||
|
||||
type WebSearchArgs = {
|
||||
query: string;
|
||||
provider?: string;
|
||||
count?: number;
|
||||
country?: string;
|
||||
freshness?: string;
|
||||
};
|
||||
|
||||
type BraveSearchResult = {
|
||||
title?: string;
|
||||
url?: string;
|
||||
description?: string;
|
||||
age?: string;
|
||||
};
|
||||
|
||||
type BraveSearchResponse = {
|
||||
web?: {
|
||||
results?: BraveSearchResult[];
|
||||
};
|
||||
};
|
||||
|
||||
type PerplexitySearchResponse = {
|
||||
choices?: Array<{
|
||||
message?: {
|
||||
content?: string;
|
||||
};
|
||||
}>;
|
||||
citations?: string[];
|
||||
};
|
||||
|
||||
export type WebSearchResult = {
|
||||
query: string;
|
||||
provider: SearchProvider;
|
||||
tookMs: number;
|
||||
cached?: boolean;
|
||||
} & (
|
||||
| {
|
||||
// Brave result
|
||||
count: number;
|
||||
results: Array<{
|
||||
title: string;
|
||||
url: string;
|
||||
description: string;
|
||||
published?: string;
|
||||
siteName?: string;
|
||||
}>;
|
||||
}
|
||||
| {
|
||||
// Perplexity result
|
||||
model: string;
|
||||
content: string;
|
||||
citations: string[];
|
||||
}
|
||||
);
|
||||
|
||||
function resolveSearchCount(value: unknown, fallback: number): number {
|
||||
const parsed = typeof value === "number" && Number.isFinite(value) ? value : fallback;
|
||||
const clamped = Math.max(1, Math.min(MAX_SEARCH_COUNT, Math.floor(parsed)));
|
||||
return clamped;
|
||||
}
|
||||
|
||||
function normalizeFreshness(value: string | undefined): string | undefined {
|
||||
if (!value) return undefined;
|
||||
const trimmed = value.trim();
|
||||
if (!trimmed) return undefined;
|
||||
|
||||
const lower = trimmed.toLowerCase();
|
||||
if (BRAVE_FRESHNESS_SHORTCUTS.has(lower)) return lower;
|
||||
|
||||
const match = trimmed.match(BRAVE_FRESHNESS_RANGE);
|
||||
if (!match) return undefined;
|
||||
|
||||
const start = match[1];
|
||||
const end = match[2];
|
||||
if (!start || !end) return undefined;
|
||||
if (!isValidIsoDate(start) || !isValidIsoDate(end)) return undefined;
|
||||
if (start > end) return undefined;
|
||||
|
||||
return `${start}to${end}`;
|
||||
}
|
||||
|
||||
function isValidIsoDate(value: string): boolean {
|
||||
if (!/^\d{4}-\d{2}-\d{2}$/.test(value)) return false;
|
||||
const parts = value.split("-").map((part) => Number.parseInt(part, 10));
|
||||
const year = parts[0];
|
||||
const month = parts[1];
|
||||
const day = parts[2];
|
||||
if (year === undefined || month === undefined || day === undefined) return false;
|
||||
if (!Number.isFinite(year) || !Number.isFinite(month) || !Number.isFinite(day)) return false;
|
||||
|
||||
const date = new Date(Date.UTC(year, month - 1, day));
|
||||
return (
|
||||
date.getUTCFullYear() === year && date.getUTCMonth() === month - 1 && date.getUTCDate() === day
|
||||
);
|
||||
}
|
||||
|
||||
function resolveSiteName(url: string | undefined): string | undefined {
|
||||
if (!url) return undefined;
|
||||
try {
|
||||
return new URL(url).hostname;
|
||||
} catch {
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
function inferPerplexityBaseUrl(apiKey: string): string {
|
||||
const normalized = apiKey.toLowerCase();
|
||||
if (PERPLEXITY_KEY_PREFIXES.some((prefix) => normalized.startsWith(prefix))) {
|
||||
return PERPLEXITY_DIRECT_BASE_URL;
|
||||
}
|
||||
if (OPENROUTER_KEY_PREFIXES.some((prefix) => normalized.startsWith(prefix))) {
|
||||
return DEFAULT_PERPLEXITY_BASE_URL;
|
||||
}
|
||||
return DEFAULT_PERPLEXITY_BASE_URL;
|
||||
}
|
||||
|
||||
function resolvePerplexityApiKey(): { apiKey: string; source: string } | { apiKey: null; source: "none" } {
|
||||
const perplexityKey = (process.env.PERPLEXITY_API_KEY ?? "").trim();
|
||||
if (perplexityKey) {
|
||||
return { apiKey: perplexityKey, source: "PERPLEXITY_API_KEY" };
|
||||
}
|
||||
|
||||
const openrouterKey = (process.env.OPENROUTER_API_KEY ?? "").trim();
|
||||
if (openrouterKey) {
|
||||
return { apiKey: openrouterKey, source: "OPENROUTER_API_KEY" };
|
||||
}
|
||||
|
||||
return { apiKey: null, source: "none" };
|
||||
}
|
||||
|
||||
function resolveBraveApiKey(): string | undefined {
|
||||
return (process.env.BRAVE_API_KEY ?? "").trim() || undefined;
|
||||
}
|
||||
|
||||
function resolveProvider(requested?: string): SearchProvider {
|
||||
if (requested === "perplexity") return "perplexity";
|
||||
if (requested === "brave") return "brave";
|
||||
|
||||
// Auto-detect based on available API keys
|
||||
const braveKey = resolveBraveApiKey();
|
||||
if (braveKey) return "brave";
|
||||
|
||||
const perplexityResult = resolvePerplexityApiKey();
|
||||
if (perplexityResult.apiKey) return "perplexity";
|
||||
|
||||
// Default to brave
|
||||
return "brave";
|
||||
}
|
||||
|
||||
async function runPerplexitySearch(params: {
|
||||
query: string;
|
||||
apiKey: string;
|
||||
baseUrl: string;
|
||||
model: string;
|
||||
timeoutSeconds: number;
|
||||
}): Promise<{ content: string; citations: string[] }> {
|
||||
const endpoint = `${params.baseUrl.replace(/\/$/, "")}/chat/completions`;
|
||||
|
||||
const res = await fetch(endpoint, {
|
||||
method: "POST",
|
||||
headers: {
|
||||
"Content-Type": "application/json",
|
||||
Authorization: `Bearer ${params.apiKey}`,
|
||||
"HTTP-Referer": "https://multica.ai",
|
||||
"X-Title": "Multica Web Search",
|
||||
},
|
||||
body: JSON.stringify({
|
||||
model: params.model,
|
||||
messages: [
|
||||
{
|
||||
role: "user",
|
||||
content: params.query,
|
||||
},
|
||||
],
|
||||
}),
|
||||
signal: withTimeout(undefined, params.timeoutSeconds * 1000),
|
||||
});
|
||||
|
||||
if (!res.ok) {
|
||||
const detail = await readResponseText(res);
|
||||
throw new Error(`Perplexity API error (${res.status}): ${detail || res.statusText}`);
|
||||
}
|
||||
|
||||
const data = (await res.json()) as PerplexitySearchResponse;
|
||||
const content = data.choices?.[0]?.message?.content ?? "No response";
|
||||
const citations = data.citations ?? [];
|
||||
|
||||
return { content, citations };
|
||||
}
|
||||
|
||||
async function runBraveSearch(params: {
|
||||
query: string;
|
||||
count: number;
|
||||
apiKey: string;
|
||||
timeoutSeconds: number;
|
||||
country: string | undefined;
|
||||
freshness: string | undefined;
|
||||
}): Promise<{
|
||||
results: Array<{
|
||||
title: string;
|
||||
url: string;
|
||||
description: string;
|
||||
published?: string;
|
||||
siteName?: string;
|
||||
}>;
|
||||
}> {
|
||||
const url = new URL(BRAVE_SEARCH_ENDPOINT);
|
||||
url.searchParams.set("q", params.query);
|
||||
url.searchParams.set("count", String(params.count));
|
||||
if (params.country) {
|
||||
url.searchParams.set("country", params.country);
|
||||
}
|
||||
if (params.freshness) {
|
||||
url.searchParams.set("freshness", params.freshness);
|
||||
}
|
||||
|
||||
const res = await fetch(url.toString(), {
|
||||
method: "GET",
|
||||
headers: {
|
||||
Accept: "application/json",
|
||||
"X-Subscription-Token": params.apiKey,
|
||||
},
|
||||
signal: withTimeout(undefined, params.timeoutSeconds * 1000),
|
||||
});
|
||||
|
||||
if (!res.ok) {
|
||||
const detail = await readResponseText(res);
|
||||
throw new Error(`Brave Search API error (${res.status}): ${detail || res.statusText}`);
|
||||
}
|
||||
|
||||
const data = (await res.json()) as BraveSearchResponse;
|
||||
const rawResults = Array.isArray(data.web?.results) ? (data.web?.results ?? []) : [];
|
||||
const results = rawResults.map((entry) => {
|
||||
const result: {
|
||||
title: string;
|
||||
url: string;
|
||||
description: string;
|
||||
published?: string;
|
||||
siteName?: string;
|
||||
} = {
|
||||
title: entry.title ?? "",
|
||||
url: entry.url ?? "",
|
||||
description: entry.description ?? "",
|
||||
};
|
||||
if (entry.age) {
|
||||
result.published = entry.age;
|
||||
}
|
||||
const siteName = resolveSiteName(entry.url);
|
||||
if (siteName) {
|
||||
result.siteName = siteName;
|
||||
}
|
||||
return result;
|
||||
});
|
||||
return { results };
|
||||
}
|
||||
|
||||
async function runWebSearch(params: {
|
||||
query: string;
|
||||
provider: SearchProvider;
|
||||
count: number;
|
||||
timeoutSeconds: number;
|
||||
cacheTtlMs: number;
|
||||
country: string | undefined;
|
||||
freshness: string | undefined;
|
||||
}): Promise<Record<string, unknown>> {
|
||||
const cacheKey = normalizeCacheKey(
|
||||
`${params.provider}:${params.query}:${params.count}:${params.country || "default"}:${params.freshness || "default"}`,
|
||||
);
|
||||
const cached = readCache(SEARCH_CACHE, cacheKey);
|
||||
if (cached) return { ...cached.value, cached: true };
|
||||
|
||||
const start = Date.now();
|
||||
|
||||
if (params.provider === "perplexity") {
|
||||
const perplexityResult = resolvePerplexityApiKey();
|
||||
if (!perplexityResult.apiKey) {
|
||||
return {
|
||||
error: "missing_api_key",
|
||||
message:
|
||||
"Perplexity search requires PERPLEXITY_API_KEY or OPENROUTER_API_KEY environment variable.",
|
||||
};
|
||||
}
|
||||
|
||||
const apiKey = perplexityResult.apiKey;
|
||||
const baseUrl = inferPerplexityBaseUrl(apiKey);
|
||||
const { content, citations } = await runPerplexitySearch({
|
||||
query: params.query,
|
||||
apiKey,
|
||||
baseUrl,
|
||||
model: DEFAULT_PERPLEXITY_MODEL,
|
||||
timeoutSeconds: params.timeoutSeconds,
|
||||
});
|
||||
|
||||
const payload = {
|
||||
query: params.query,
|
||||
provider: params.provider,
|
||||
model: DEFAULT_PERPLEXITY_MODEL,
|
||||
tookMs: Date.now() - start,
|
||||
content,
|
||||
citations,
|
||||
};
|
||||
writeCache(SEARCH_CACHE, cacheKey, payload, params.cacheTtlMs);
|
||||
return payload;
|
||||
}
|
||||
|
||||
// Brave search
|
||||
const apiKey = resolveBraveApiKey();
|
||||
if (!apiKey) {
|
||||
return {
|
||||
error: "missing_api_key",
|
||||
message: "Brave search requires BRAVE_API_KEY environment variable.",
|
||||
};
|
||||
}
|
||||
|
||||
const { results } = await runBraveSearch({
|
||||
query: params.query,
|
||||
count: params.count,
|
||||
apiKey,
|
||||
timeoutSeconds: params.timeoutSeconds,
|
||||
country: params.country,
|
||||
freshness: params.freshness,
|
||||
});
|
||||
|
||||
const payload = {
|
||||
query: params.query,
|
||||
provider: params.provider,
|
||||
count: results.length,
|
||||
tookMs: Date.now() - start,
|
||||
results,
|
||||
};
|
||||
writeCache(SEARCH_CACHE, cacheKey, payload, params.cacheTtlMs);
|
||||
return payload;
|
||||
}
|
||||
|
||||
export function createWebSearchTool(): AgentTool<typeof WebSearchSchema, unknown> {
|
||||
return {
|
||||
name: "web_search",
|
||||
label: "Web Search",
|
||||
description:
|
||||
'Search the web. Supports "brave" (traditional results with titles/URLs/snippets) and "perplexity" (AI-synthesized answers with citations). Provider auto-detected from available API keys if not specified.',
|
||||
parameters: WebSearchSchema,
|
||||
execute: async (_toolCallId, args) => {
|
||||
const params = args as WebSearchArgs;
|
||||
const query = readStringParam(params as Record<string, unknown>, "query", { required: true });
|
||||
const providerRaw = readStringParam(params as Record<string, unknown>, "provider");
|
||||
const provider = resolveProvider(providerRaw);
|
||||
const count =
|
||||
readNumberParam(params as Record<string, unknown>, "count", { integer: true }) ??
|
||||
DEFAULT_SEARCH_COUNT;
|
||||
const country = readStringParam(params as Record<string, unknown>, "country");
|
||||
const rawFreshness = readStringParam(params as Record<string, unknown>, "freshness");
|
||||
|
||||
if (rawFreshness && provider !== "brave") {
|
||||
return jsonResult({
|
||||
error: "unsupported_parameter",
|
||||
message: "freshness parameter is only supported by the Brave search provider.",
|
||||
});
|
||||
}
|
||||
|
||||
const freshness = rawFreshness ? normalizeFreshness(rawFreshness) : undefined;
|
||||
if (rawFreshness && !freshness) {
|
||||
return jsonResult({
|
||||
error: "invalid_freshness",
|
||||
message:
|
||||
"freshness must be one of: pd (past day), pw (past week), pm (past month), py (past year), or YYYY-MM-DDtoYYYY-MM-DD.",
|
||||
});
|
||||
}
|
||||
|
||||
try {
|
||||
const result = await runWebSearch({
|
||||
query,
|
||||
provider,
|
||||
count: resolveSearchCount(count, DEFAULT_SEARCH_COUNT),
|
||||
timeoutSeconds: resolveTimeoutSeconds(DEFAULT_TIMEOUT_SECONDS, DEFAULT_TIMEOUT_SECONDS),
|
||||
cacheTtlMs: resolveCacheTtlMs(DEFAULT_CACHE_TTL_MINUTES, DEFAULT_CACHE_TTL_MINUTES),
|
||||
country,
|
||||
freshness,
|
||||
});
|
||||
return jsonResult(result);
|
||||
} catch (error) {
|
||||
return jsonResult({
|
||||
error: "search_failed",
|
||||
message: error instanceof Error ? error.message : String(error),
|
||||
});
|
||||
}
|
||||
},
|
||||
};
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue