diff --git a/packages/core/src/agent/runner.ts b/packages/core/src/agent/runner.ts index 04504866..4fb248fd 100644 --- a/packages/core/src/agent/runner.ts +++ b/packages/core/src/agent/runner.ts @@ -44,6 +44,7 @@ import { import type { AuthProfileFailureReason } from "./auth-profiles/index.js"; import { analyzeCrossTurnWebFetchNeed, + resolveWebFetchRequirementFromPrompt, shouldEnforceWebFetchAfterSearch, summarizeWebToolUsage, type ToolExecutionRecord, @@ -133,15 +134,45 @@ function formatRunLogToolSummary(tool: string, details: Record } } -const WEB_SEARCH_FETCH_ENFORCEMENT_PROMPT = [ - "You used web_search but did not complete a successful web_fetch in this turn.", - "Search snippets are incomplete previews and are not sufficient evidence for detailed claims.", - "Before finalizing your answer, you MUST:", - "1) Pick the 1-3 most relevant URLs from the web_search results.", - "2) Call web_fetch on those URLs.", - "3) Revise your answer based on fetched content.", - "If all fetch attempts fail, explicitly say so and avoid relying on snippets for specific claims.", -].join("\n"); +function buildWebSearchFetchEnforcementPrompt(params: { + requiredMinFetchSuccess: number; + fetchSuccess: number; + needsFollowupForLatestSearch: boolean; +}): { prompt: string; additionalFetchNeeded: number } { + const additionalFetchNeeded = Math.max( + 1, + params.requiredMinFetchSuccess - params.fetchSuccess, + params.needsFollowupForLatestSearch ? 1 : 0, + ); + + const lines = [ + "You used web_search, but web evidence coverage for this turn is still incomplete.", + "Search snippets are incomplete previews and are not sufficient evidence for detailed claims.", + ]; + + if (params.requiredMinFetchSuccess > 1) { + lines.push( + `This task currently requires at least ${params.requiredMinFetchSuccess} successful web_fetch calls.`, + ); + } + + if (params.needsFollowupForLatestSearch) { + lines.push( + "You performed another successful web_search after your last successful web_fetch. " + + "You must fetch URLs from the latest search results before finalizing.", + ); + } + + lines.push( + "Before finalizing your answer, you MUST:", + "1) Pick the 1-3 most relevant URLs from the latest successful web_search results.", + `2) Complete at least ${additionalFetchNeeded} additional successful web_fetch call(s).`, + "3) Revise your answer based on fetched page content.", + "If all additional fetch attempts fail, explicitly say so and avoid relying on snippets for specific claims.", + ); + + return { prompt: lines.join("\n"), additionalFetchNeeded }; +} const CROSS_TURN_WEB_FETCH_ENFORCEMENT_PROMPT = [ "You are about to finalize a web-dependent answer, but no successful web_fetch happened in this turn.", @@ -590,7 +621,10 @@ export class Agent { messages: this.agent.state.messages.length, }); await this.agent.prompt(prompt); - await this.enforceWebFetchAfterSearchIfNeeded(toolExecutionStartIndex); + await this.enforceWebFetchAfterSearchIfNeeded({ + toolExecutionStartIndex, + userPrompt: prompt, + }); await this.enforceCrossTurnWebFetchIfNeeded({ toolExecutionStartIndex, userPrompt: prompt, @@ -816,9 +850,10 @@ export class Agent { this.session.setApiKey(this.currentApiKey); } - private async enforceWebFetchAfterSearchIfNeeded( - toolExecutionStartIndex: number, - ): Promise { + private async enforceWebFetchAfterSearchIfNeeded(params: { + toolExecutionStartIndex: number; + userPrompt: string; + }): Promise { if (this._internalRun) return; const activeTools = new Set( @@ -828,32 +863,48 @@ export class Agent { const webFetchAvailable = activeTools.has("web_fetch"); const currentTurnExecutions = this.currentRunToolExecutions.slice( - toolExecutionStartIndex, + params.toolExecutionStartIndex, ); const usage = summarizeWebToolUsage(currentTurnExecutions); + const requirement = resolveWebFetchRequirementFromPrompt(params.userPrompt); if ( !shouldEnforceWebFetchAfterSearch({ usage, webSearchAvailable, webFetchAvailable, + requiredMinFetchSuccess: requirement.requiredMinFetchSuccess, }) ) { return; } + const { prompt, additionalFetchNeeded } = buildWebSearchFetchEnforcementPrompt({ + requiredMinFetchSuccess: requirement.requiredMinFetchSuccess, + fetchSuccess: usage.fetchSuccess, + needsFollowupForLatestSearch: usage.searchNeedsFollowupFetch, + }); + this.runLog.log("web_search_fetch_guard", { search_calls: usage.searchCalls, search_success: usage.searchSuccess, search_with_results: usage.searchSuccessWithResults, + search_needs_followup_fetch: usage.searchNeedsFollowupFetch, fetch_calls: usage.fetchCalls, fetch_success: usage.fetchSuccess, + required_min_fetch_success: requirement.requiredMinFetchSuccess, + prompt_suggests_research_depth: requirement.promptSuggestsResearchDepth, + prompt_multi_source_cue: requirement.multiSourceCue, + prompt_explicit_min_fetch: requirement.explicitMinFetchFromPrompt, }); try { - await this.agent.prompt(WEB_SEARCH_FETCH_ENFORCEMENT_PROMPT); + await this.agent.prompt(prompt); this.runLog.log("web_search_fetch_guard_applied", { search_with_results: usage.searchSuccessWithResults, + search_needs_followup_fetch: usage.searchNeedsFollowupFetch, + required_min_fetch_success: requirement.requiredMinFetchSuccess, + additional_fetch_needed: additionalFetchNeeded, }); } catch (error) { const message = error instanceof Error ? error.message : String(error); diff --git a/packages/core/src/agent/web-tools-policy.test.ts b/packages/core/src/agent/web-tools-policy.test.ts index dd99d3ac..2246fe48 100644 --- a/packages/core/src/agent/web-tools-policy.test.ts +++ b/packages/core/src/agent/web-tools-policy.test.ts @@ -1,6 +1,7 @@ import { describe, expect, it } from "vitest"; import { analyzeCrossTurnWebFetchNeed, + resolveWebFetchRequirementFromPrompt, shouldEnforceWebFetchAfterSearch, summarizeWebToolUsage, type ToolExecutionRecord, @@ -31,6 +32,7 @@ describe("web-tools-policy", () => { expect(usage.searchCalls).toBe(1); expect(usage.searchSuccess).toBe(1); expect(usage.searchSuccessWithResults).toBe(1); + expect(usage.searchNeedsFollowupFetch).toBe(true); expect(usage.fetchCalls).toBe(0); expect(usage.fetchSuccess).toBe(0); }); @@ -46,6 +48,22 @@ describe("web-tools-policy", () => { expect(usage.searchCalls).toBe(1); expect(usage.searchSuccess).toBe(0); expect(usage.searchSuccessWithResults).toBe(0); + expect(usage.searchNeedsFollowupFetch).toBe(false); + }); + + it("marks latest search as covered when successful fetch follows", () => { + const usage = summarizeWebToolUsage([ + buildRecord({ + toolName: "web_search", + details: { count: 1, results: [{}] }, + }), + buildRecord({ + toolName: "web_fetch", + details: { status: 200, length: 1024 }, + }), + ]); + + expect(usage.searchNeedsFollowupFetch).toBe(false); }); }); @@ -88,6 +106,53 @@ describe("web-tools-policy", () => { ).toBe(false); }); + it("enforces when the latest successful search has no follow-up fetch", () => { + const usage = summarizeWebToolUsage([ + buildRecord({ + toolName: "web_search", + details: { count: 2, results: [{}, {}] }, + }), + buildRecord({ + toolName: "web_fetch", + details: { status: 200, length: 1200 }, + }), + buildRecord({ + toolName: "web_search", + details: { count: 3, results: [{}, {}, {}] }, + }), + ]); + + expect( + shouldEnforceWebFetchAfterSearch({ + usage, + webSearchAvailable: true, + webFetchAvailable: true, + }), + ).toBe(true); + }); + + it("enforces when prompt requires deeper evidence coverage", () => { + const usage = summarizeWebToolUsage([ + buildRecord({ + toolName: "web_search", + details: { count: 6, results: [{}, {}, {}] }, + }), + buildRecord({ + toolName: "web_fetch", + details: { status: 200, length: 2200 }, + }), + ]); + + expect( + shouldEnforceWebFetchAfterSearch({ + usage, + webSearchAvailable: true, + webFetchAvailable: true, + requiredMinFetchSuccess: 2, + }), + ).toBe(true); + }); + it("does not enforce when search returns no results", () => { const usage = summarizeWebToolUsage([ buildRecord({ @@ -230,4 +295,33 @@ describe("web-tools-policy", () => { expect(analysis.webCue).toBe(false); }); }); + + describe("resolveWebFetchRequirementFromPrompt", () => { + it("requires deeper fetch coverage for research-style prompts", () => { + const result = resolveWebFetchRequirementFromPrompt( + "帮我调研一下 APPLE 最近的产品信息,并做分析。", + ); + + expect(result.requiredMinFetchSuccess).toBe(2); + expect(result.promptSuggestsResearchDepth).toBe(true); + }); + + it("uses explicit minimum source count when present", () => { + const result = resolveWebFetchRequirementFromPrompt( + "Please use at least 3 sources and summarize the latest updates.", + ); + + expect(result.requiredMinFetchSuccess).toBe(3); + expect(result.explicitMinFetchFromPrompt).toBe(3); + }); + + it("falls back to 1 for simple prompts", () => { + const result = resolveWebFetchRequirementFromPrompt( + "What is OpenAI's CEO?", + ); + + expect(result.requiredMinFetchSuccess).toBe(1); + expect(result.promptSuggestsResearchDepth).toBe(false); + }); + }); }); diff --git a/packages/core/src/agent/web-tools-policy.ts b/packages/core/src/agent/web-tools-policy.ts index 21e3ad8c..9667af04 100644 --- a/packages/core/src/agent/web-tools-policy.ts +++ b/packages/core/src/agent/web-tools-policy.ts @@ -8,10 +8,19 @@ export type WebToolUsage = { searchCalls: number; searchSuccess: number; searchSuccessWithResults: number; + /** True when the latest successful search (with results) has no later successful fetch. */ + searchNeedsFollowupFetch: boolean; fetchCalls: number; fetchSuccess: number; }; +export type WebFetchRequirement = { + requiredMinFetchSuccess: number; + promptSuggestsResearchDepth: boolean; + multiSourceCue: boolean; + explicitMinFetchFromPrompt: number | null; +}; + export type CrossTurnWebFetchGuardAnalysis = { shouldEnforce: boolean; explicitFetchRequest: boolean; @@ -45,6 +54,17 @@ const USER_WEB_CONTEXT_PATTERNS: RegExp[] = [ /(?:\u7f51\u9875|\u7f51\u7ad9|\u7f51\u7edc|\u4e92\u8054\u7f51|\u94fe\u63a5|\u6765\u6e90|\u65b0\u95fb|\u62a5\u9053|\u6587\u7ae0)/, ]; +const USER_RESEARCH_DEPTH_PATTERNS: RegExp[] = [ + /\b(research|investigate|analysis|analyze|compare|comparison|deep[-\s]?dive|survey|report|review)\b/i, + /(?:\u8c03\u7814|\u7814\u7a76|\u5206\u6790|\u6df1\u5ea6|\u5bf9\u6bd4|\u5bf9\u7167|\u6c47\u603b|\u76d8\u70b9|\u62a5\u544a|\u8bc4\u4f30|\u8bc4\u6d4b)/, +]; + +const USER_MULTI_SOURCE_PATTERNS: RegExp[] = [ + /\b(multiple|multi-source|across sources|different sources)\b/i, + /(?:\u591a\u6765\u6e90|\u591a\u4e2a\u6765\u6e90|\u4e0d\u540c\u6765\u6e90|\u591a\u7f51\u7ad9)/, + /(?:\u81f3\u5c11|\u4e0d\u5c11\u4e8e|\u6700\u5c11)\s*\d+\s*(?:\u4e2a|\u6761)?(?:\u6765\u6e90|\u94fe\u63a5|\u7f51\u5740|\u7f51\u9875|\u6587\u7ae0)/, +]; + const USER_WEB_BLOCK_PATTERNS: RegExp[] = [ /\b(do not|don't|no|without)\s+(browse|web|internet|web_search|web_fetch|fetch)\b/i, /\bonly\b.*\b(snippet|snippets)\b/i, @@ -63,6 +83,28 @@ function hasAnyPattern(text: string, patterns: RegExp[]): boolean { return patterns.some((pattern) => pattern.test(text)); } +function normalizeMinFetchSuccess(raw: number): number { + if (!Number.isFinite(raw)) return 1; + return Math.max(1, Math.min(4, Math.floor(raw))); +} + +function extractExplicitMinFetchFromPrompt(prompt: string): number | null { + const patterns: RegExp[] = [ + /\b(?:at least|minimum of|no less than)\s*(\d+)\s*(?:sources?|links?|urls?|articles?|pages?)\b/i, + /(?:\u81f3\u5c11|\u4e0d\u5c11\u4e8e|\u6700\u5c11)\s*(\d+)\s*(?:\u4e2a|\u6761)?(?:\u6765\u6e90|\u94fe\u63a5|\u7f51\u5740|\u7f51\u9875|\u6587\u7ae0)/, + ]; + + for (const pattern of patterns) { + const match = prompt.match(pattern); + if (!match) continue; + const parsed = Number(match[1]); + if (!Number.isFinite(parsed)) continue; + return normalizeMinFetchSuccess(parsed); + } + + return null; +} + function hasToolError(details: Record | null): boolean { return details?.error === true; } @@ -93,9 +135,11 @@ export function summarizeWebToolUsage(records: ToolExecutionRecord[]): WebToolUs searchCalls: 0, searchSuccess: 0, searchSuccessWithResults: 0, + searchNeedsFollowupFetch: false, fetchCalls: 0, fetchSuccess: 0, }; + let pendingSearchWithResults = false; for (const record of records) { const toolName = record.toolName.trim().toLowerCase(); @@ -106,6 +150,7 @@ export function summarizeWebToolUsage(records: ToolExecutionRecord[]): WebToolUs usage.searchSuccess += 1; if (getSearchResultCount(record.details) > 0) { usage.searchSuccessWithResults += 1; + pendingSearchWithResults = true; } } continue; @@ -115,10 +160,12 @@ export function summarizeWebToolUsage(records: ToolExecutionRecord[]): WebToolUs usage.fetchCalls += 1; if (isSuccessfulExecution(record)) { usage.fetchSuccess += 1; + pendingSearchWithResults = false; } } } + usage.searchNeedsFollowupFetch = pendingSearchWithResults; return usage; } @@ -126,14 +173,49 @@ export function shouldEnforceWebFetchAfterSearch(params: { usage: WebToolUsage; webSearchAvailable: boolean; webFetchAvailable: boolean; + requiredMinFetchSuccess?: number; }): boolean { - const { usage, webSearchAvailable, webFetchAvailable } = params; + const { + usage, + webSearchAvailable, + webFetchAvailable, + requiredMinFetchSuccess = 1, + } = params; if (!webSearchAvailable || !webFetchAvailable) return false; if (usage.searchSuccessWithResults <= 0) return false; - if (usage.fetchSuccess > 0) return false; + if (usage.fetchSuccess <= 0) return true; + if (usage.searchNeedsFollowupFetch) return true; + if (usage.fetchSuccess < normalizeMinFetchSuccess(requiredMinFetchSuccess)) return true; - return true; + return false; +} + +export function resolveWebFetchRequirementFromPrompt(prompt: string): WebFetchRequirement { + const normalizedPrompt = prompt ?? ""; + const promptSuggestsResearchDepth = hasAnyPattern( + normalizedPrompt, + USER_RESEARCH_DEPTH_PATTERNS, + ); + const multiSourceCue = hasAnyPattern(normalizedPrompt, USER_MULTI_SOURCE_PATTERNS); + const explicitMinFetchFromPrompt = extractExplicitMinFetchFromPrompt(normalizedPrompt); + + let requiredMinFetchSuccess = 1; + if (promptSuggestsResearchDepth) requiredMinFetchSuccess = 2; + if (multiSourceCue) requiredMinFetchSuccess = Math.max(requiredMinFetchSuccess, 2); + if (explicitMinFetchFromPrompt !== null) { + requiredMinFetchSuccess = Math.max( + requiredMinFetchSuccess, + explicitMinFetchFromPrompt, + ); + } + + return { + requiredMinFetchSuccess: normalizeMinFetchSuccess(requiredMinFetchSuccess), + promptSuggestsResearchDepth, + multiSourceCue, + explicitMinFetchFromPrompt, + }; } export function analyzeCrossTurnWebFetchNeed(params: {