From ce6291e9eb4ab9f04d64d299c65800756d265db5 Mon Sep 17 00:00:00 2001 From: Jiayuan Zhang Date: Tue, 17 Feb 2026 00:49:57 +0800 Subject: [PATCH] fix(agent): enforce web_fetch after successful web_search --- packages/core/src/agent/runner.ts | 79 +++++++++- .../src/agent/system-prompt/sections.test.ts | 2 +- .../core/src/agent/system-prompt/sections.ts | 10 +- .../core/src/agent/web-tools-policy.test.ts | 145 ++++++++++++++++++ packages/core/src/agent/web-tools-policy.ts | 86 +++++++++++ 5 files changed, 314 insertions(+), 8 deletions(-) create mode 100644 packages/core/src/agent/web-tools-policy.test.ts create mode 100644 packages/core/src/agent/web-tools-policy.ts diff --git a/packages/core/src/agent/runner.ts b/packages/core/src/agent/runner.ts index c812b4a1..1f137f94 100644 --- a/packages/core/src/agent/runner.ts +++ b/packages/core/src/agent/runner.ts @@ -42,6 +42,11 @@ import { type SystemPromptMode, } from "./system-prompt/index.js"; import type { AuthProfileFailureReason } from "./auth-profiles/index.js"; +import { + shouldEnforceWebFetchAfterSearch, + summarizeWebToolUsage, + type ToolExecutionRecord, +} from "./web-tools-policy.js"; import { sanitizeToolCallInputs, sanitizeToolUseResultPairing, @@ -127,6 +132,16 @@ function formatRunLogToolSummary(tool: string, details: Record } } +const WEB_SEARCH_FETCH_ENFORCEMENT_PROMPT = [ + "You used web_search but did not complete a successful web_fetch in this turn.", + "Search snippets are incomplete previews and are not sufficient evidence for detailed claims.", + "Before finalizing your answer, you MUST:", + "1) Pick the 1-3 most relevant URLs from the web_search results.", + "2) Call web_fetch on those URLs.", + "3) Revise your answer based on fetched content.", + "If all fetch attempts fail, explicitly say so and avoid relying on snippets for specific claims.", +].join("\n"); + export class Agent { private readonly agent: PiAgentCore; private output; @@ -141,6 +156,7 @@ export class Agent { private readonly stderr: NodeJS.WritableStream; private readonly runLog: RunLog; private readonly toolStartTimes = new Map(); + private currentRunToolExecutions: ToolExecutionRecord[] = []; private initialized = false; // Context window settings (for pre-flight compaction) @@ -524,6 +540,7 @@ export class Agent { this.currentUserSource = options?.source; this._isRunning = true; this._aborted = false; + this.currentRunToolExecutions = []; const runStart = Date.now(); this.runLog.log("run_start", { @@ -552,6 +569,7 @@ export class Agent { // Loop to exhaust all candidate profiles on rotatable errors while (true) { + const toolExecutionStartIndex = this.currentRunToolExecutions.length; try { const llmStart = Date.now(); this.runLog.log("llm_call", { @@ -561,6 +579,7 @@ export class Agent { messages: this.agent.state.messages.length, }); await this.agent.prompt(prompt); + await this.enforceWebFetchAfterSearchIfNeeded(toolExecutionStartIndex); this.runLog.log("llm_result", { duration_ms: Date.now() - llmStart, }); @@ -692,6 +711,7 @@ export class Agent { this._lastEventSavedAssistant = undefined; this.currentUserDisplayPrompt = undefined; this.currentUserSource = undefined; + this.currentRunToolExecutions = []; this.runLog.flush().catch(() => {}); } } @@ -781,6 +801,56 @@ export class Agent { this.session.setApiKey(this.currentApiKey); } + private async enforceWebFetchAfterSearchIfNeeded( + toolExecutionStartIndex: number, + ): Promise { + if (this._internalRun) return; + + const activeTools = new Set( + (this.agent.state.tools ?? []).map((tool) => tool.name.toLowerCase()), + ); + const webSearchAvailable = activeTools.has("web_search"); + const webFetchAvailable = activeTools.has("web_fetch"); + + const currentTurnExecutions = this.currentRunToolExecutions.slice( + toolExecutionStartIndex, + ); + const usage = summarizeWebToolUsage(currentTurnExecutions); + + if ( + !shouldEnforceWebFetchAfterSearch({ + usage, + webSearchAvailable, + webFetchAvailable, + }) + ) { + return; + } + + this.runLog.log("web_search_fetch_guard", { + search_calls: usage.searchCalls, + search_success: usage.searchSuccess, + search_with_results: usage.searchSuccessWithResults, + fetch_calls: usage.fetchCalls, + fetch_success: usage.fetchSuccess, + }); + + try { + await this.agent.prompt(WEB_SEARCH_FETCH_ENFORCEMENT_PROMPT); + this.runLog.log("web_search_fetch_guard_applied", { + search_with_results: usage.searchSuccessWithResults, + }); + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + this.runLog.log("web_search_fetch_guard_failed", { + error: message.slice(0, 200), + }); + if (this.debug) { + this.stderr.write(`[web-guard] Failed to enforce search->fetch: ${message}\n`); + } + } + } + private handleRunLogEvent(event: AgentEvent) { if (event.type === "tool_execution_start") { const toolName = (event as any).toolName ?? "unknown"; @@ -800,11 +870,18 @@ export class Agent { const resultText = extractRunLogResultText(result); const resultChars = resultText?.length ?? 0; const details = extractRunLogResultDetails(result); + const isError = Boolean((event as any).isError ?? false); + + this.currentRunToolExecutions.push({ + toolName, + isError, + details, + }); const toolEndData: Record = { tool: toolName, duration_ms, - is_error: (event as any).isError ?? false, + is_error: isError, result_chars: resultChars, result_summary: formatRunLogToolSummary(toolName, details), }; diff --git a/packages/core/src/agent/system-prompt/sections.test.ts b/packages/core/src/agent/system-prompt/sections.test.ts index a1d2dd5a..75763e1e 100644 --- a/packages/core/src/agent/system-prompt/sections.test.ts +++ b/packages/core/src/agent/system-prompt/sections.test.ts @@ -181,7 +181,7 @@ describe("buildConditionalToolSections", () => { const result = buildConditionalToolSections(["web_search"], "full"); const text = result.join("\n"); expect(text).toContain("## Web Access"); - expect(text).toContain("Web usage is conditional, not mandatory"); + expect(text).toContain("you MUST call web_fetch"); }); it("adds dynamic evidence decision guidance when data tool is present", () => { diff --git a/packages/core/src/agent/system-prompt/sections.ts b/packages/core/src/agent/system-prompt/sections.ts index bf935fce..9fdb2591 100644 --- a/packages/core/src/agent/system-prompt/sections.ts +++ b/packages/core/src/agent/system-prompt/sections.ts @@ -364,14 +364,12 @@ export function buildConditionalToolSections( "## Web Access", "You have web access. Use it when the user asks about current events, needs up-to-date information, or requests content from URLs.", "Prefer web_search for discovery and web_fetch for specific URLs.", - "Web usage is conditional, not mandatory: call web tools when they materially improve evidence quality.", + "When web_search is used, treat snippets as incomplete previews rather than final evidence.", "", "### Search-then-Fetch", - "After web_search, evaluate whether the snippets contain enough detail to answer accurately.", - "If not, use web_fetch on the 1-3 most relevant URLs to get full content before answering.", - "Always fetch when the user asks for detailed explanations, comparisons, or analysis;", - "when snippets are vague or contradictory; or when the question requires specific data points.", - "Skip fetch when the answer is a simple fact clearly stated in the snippet or the user only wants a quick overview.", + "After every successful web_search, you MUST call web_fetch on 1-3 relevant URLs before detailed reasoning or factual claims.", + "Use fetched page content (not snippets) as the primary evidence for analysis and synthesis.", + "If all fetch attempts fail, explicitly report that limitation and avoid specific claims derived only from snippets.", "", ); } diff --git a/packages/core/src/agent/web-tools-policy.test.ts b/packages/core/src/agent/web-tools-policy.test.ts new file mode 100644 index 00000000..bb613161 --- /dev/null +++ b/packages/core/src/agent/web-tools-policy.test.ts @@ -0,0 +1,145 @@ +import { describe, expect, it } from "vitest"; +import { + shouldEnforceWebFetchAfterSearch, + summarizeWebToolUsage, + type ToolExecutionRecord, +} from "./web-tools-policy.js"; + +function buildRecord(params: { + toolName: string; + isError?: boolean; + details?: Record | null; +}): ToolExecutionRecord { + return { + toolName: params.toolName, + isError: params.isError ?? false, + details: params.details ?? null, + }; +} + +describe("web-tools-policy", () => { + describe("summarizeWebToolUsage", () => { + it("counts successful web_search calls with results", () => { + const usage = summarizeWebToolUsage([ + buildRecord({ + toolName: "web_search", + details: { count: 3, results: [{}, {}, {}] }, + }), + ]); + + expect(usage.searchCalls).toBe(1); + expect(usage.searchSuccess).toBe(1); + expect(usage.searchSuccessWithResults).toBe(1); + expect(usage.fetchCalls).toBe(0); + expect(usage.fetchSuccess).toBe(0); + }); + + it("does not count tool-level error payload as success", () => { + const usage = summarizeWebToolUsage([ + buildRecord({ + toolName: "web_search", + details: { error: true, code: "search_failed" }, + }), + ]); + + expect(usage.searchCalls).toBe(1); + expect(usage.searchSuccess).toBe(0); + expect(usage.searchSuccessWithResults).toBe(0); + }); + }); + + describe("shouldEnforceWebFetchAfterSearch", () => { + it("enforces when search has results but fetch never succeeded", () => { + const usage = summarizeWebToolUsage([ + buildRecord({ + toolName: "web_search", + details: { count: 2, results: [{}, {}] }, + }), + ]); + + expect( + shouldEnforceWebFetchAfterSearch({ + usage, + webSearchAvailable: true, + webFetchAvailable: true, + }), + ).toBe(true); + }); + + it("does not enforce after a successful web_fetch", () => { + const usage = summarizeWebToolUsage([ + buildRecord({ + toolName: "web_search", + details: { count: 2, results: [{}, {}] }, + }), + buildRecord({ + toolName: "web_fetch", + details: { status: 200, length: 1024 }, + }), + ]); + + expect( + shouldEnforceWebFetchAfterSearch({ + usage, + webSearchAvailable: true, + webFetchAvailable: true, + }), + ).toBe(false); + }); + + it("does not enforce when search returns no results", () => { + const usage = summarizeWebToolUsage([ + buildRecord({ + toolName: "web_search", + details: { count: 0, results: [] }, + }), + ]); + + expect( + shouldEnforceWebFetchAfterSearch({ + usage, + webSearchAvailable: true, + webFetchAvailable: true, + }), + ).toBe(false); + }); + + it("does not enforce when web_fetch is unavailable", () => { + const usage = summarizeWebToolUsage([ + buildRecord({ + toolName: "web_search", + details: { count: 1, results: [{}] }, + }), + ]); + + expect( + shouldEnforceWebFetchAfterSearch({ + usage, + webSearchAvailable: true, + webFetchAvailable: false, + }), + ).toBe(false); + }); + + it("enforces when fetch was attempted but failed", () => { + const usage = summarizeWebToolUsage([ + buildRecord({ + toolName: "web_search", + details: { count: 1, results: [{}] }, + }), + buildRecord({ + toolName: "web_fetch", + details: { error: true, code: "fetch_failed" }, + }), + ]); + + expect( + shouldEnforceWebFetchAfterSearch({ + usage, + webSearchAvailable: true, + webFetchAvailable: true, + }), + ).toBe(true); + }); + }); +}); diff --git a/packages/core/src/agent/web-tools-policy.ts b/packages/core/src/agent/web-tools-policy.ts new file mode 100644 index 00000000..df6b999f --- /dev/null +++ b/packages/core/src/agent/web-tools-policy.ts @@ -0,0 +1,86 @@ +export type ToolExecutionRecord = { + toolName: string; + isError: boolean; + details: Record | null; +}; + +export type WebToolUsage = { + searchCalls: number; + searchSuccess: number; + searchSuccessWithResults: number; + fetchCalls: number; + fetchSuccess: number; +}; + +function hasToolError(details: Record | null): boolean { + return details?.error === true; +} + +function getSearchResultCount(details: Record | null): number { + if (!details) return 0; + const countRaw = details.count; + if (typeof countRaw === "number" && Number.isFinite(countRaw)) { + return Math.max(0, Math.floor(countRaw)); + } + + const results = details.results; + if (Array.isArray(results)) { + return results.length; + } + + return 0; +} + +function isSuccessfulExecution(record: ToolExecutionRecord): boolean { + if (record.isError) return false; + if (hasToolError(record.details)) return false; + return true; +} + +export function summarizeWebToolUsage(records: ToolExecutionRecord[]): WebToolUsage { + const usage: WebToolUsage = { + searchCalls: 0, + searchSuccess: 0, + searchSuccessWithResults: 0, + fetchCalls: 0, + fetchSuccess: 0, + }; + + for (const record of records) { + const toolName = record.toolName.trim().toLowerCase(); + + if (toolName === "web_search") { + usage.searchCalls += 1; + if (isSuccessfulExecution(record)) { + usage.searchSuccess += 1; + if (getSearchResultCount(record.details) > 0) { + usage.searchSuccessWithResults += 1; + } + } + continue; + } + + if (toolName === "web_fetch") { + usage.fetchCalls += 1; + if (isSuccessfulExecution(record)) { + usage.fetchSuccess += 1; + } + } + } + + return usage; +} + +export function shouldEnforceWebFetchAfterSearch(params: { + usage: WebToolUsage; + webSearchAvailable: boolean; + webFetchAvailable: boolean; +}): boolean { + const { usage, webSearchAvailable, webFetchAvailable } = params; + + if (!webSearchAvailable || !webFetchAvailable) return false; + if (usage.searchSuccessWithResults <= 0) return false; + if (usage.fetchSuccess > 0) return false; + + return true; +}