diff --git a/packages/core/src/agent/runner.ts b/packages/core/src/agent/runner.ts index f36fe596..a6bc117a 100644 --- a/packages/core/src/agent/runner.ts +++ b/packages/core/src/agent/runner.ts @@ -43,6 +43,13 @@ import { type SystemPromptMode, } from "./system-prompt/index.js"; import type { AuthProfileFailureReason } from "./auth-profiles/index.js"; +import { + analyzeCrossTurnWebFetchNeed, + resolveWebFetchRequirementFromPrompt, + shouldEnforceWebFetchAfterSearch, + summarizeWebToolUsage, + type ToolExecutionRecord, +} from "./web-tools-policy.js"; import { sanitizeToolCallInputs, sanitizeToolUseResultPairing, @@ -128,6 +135,56 @@ function formatRunLogToolSummary(tool: string, details: Record } } +function buildWebSearchFetchEnforcementPrompt(params: { + requiredMinFetchSuccess: number; + fetchSuccess: number; + needsFollowupForLatestSearch: boolean; +}): { prompt: string; additionalFetchNeeded: number } { + const additionalFetchNeeded = Math.max( + 1, + params.requiredMinFetchSuccess - params.fetchSuccess, + params.needsFollowupForLatestSearch ? 1 : 0, + ); + + const lines = [ + "You used web_search, but web evidence coverage for this turn is still incomplete.", + "Search snippets are incomplete previews and are not sufficient evidence for detailed claims.", + ]; + + if (params.requiredMinFetchSuccess > 1) { + lines.push( + `This task currently requires at least ${params.requiredMinFetchSuccess} successful web_fetch calls.`, + ); + } + + if (params.needsFollowupForLatestSearch) { + lines.push( + "You performed another successful web_search after your last successful web_fetch. " + + "You must fetch URLs from the latest search results before finalizing.", + ); + } + + lines.push( + "Before finalizing your answer, you MUST:", + "1) Pick the 1-3 most relevant URLs from the latest successful web_search results.", + `2) Complete at least ${additionalFetchNeeded} additional successful web_fetch call(s).`, + "3) Revise your answer based on fetched page content.", + "If all additional fetch attempts fail, explicitly say so and avoid relying on snippets for specific claims.", + ); + + return { prompt: lines.join("\n"), additionalFetchNeeded }; +} + +const CROSS_TURN_WEB_FETCH_ENFORCEMENT_PROMPT = [ + "You are about to finalize a web-dependent answer, but no successful web_fetch happened in this turn.", + "Do not rely only on snippets or prior-turn memory for fresh factual claims.", + "Before finalizing your answer, you MUST:", + "1) If relevant URLs are already available in this conversation, call web_fetch on 1-3 of them.", + "2) If no URLs are available, call web_search to find candidates, then web_fetch on 1-3 relevant URLs.", + "3) Revise your answer using fetched page content as primary evidence.", + "If all fetch attempts fail, explicitly report that limitation and avoid specific claims not backed by fetched content.", +].join("\n"); + export class Agent { private readonly agent: PiAgentCore; private output; @@ -142,6 +199,7 @@ export class Agent { private readonly stderr: NodeJS.WritableStream; private readonly runLog: RunLog; private readonly toolStartTimes = new Map(); + private currentRunToolExecutions: ToolExecutionRecord[] = []; private initialized = false; // Context window settings (for pre-flight compaction) @@ -525,6 +583,7 @@ export class Agent { this.currentUserSource = options?.source; this._isRunning = true; this._aborted = false; + this.currentRunToolExecutions = []; const runStart = Date.now(); this.runLog.log("run_start", { @@ -553,6 +612,7 @@ export class Agent { // Loop to exhaust all candidate profiles on rotatable errors while (true) { + const toolExecutionStartIndex = this.currentRunToolExecutions.length; try { const llmStart = Date.now(); this.runLog.log("llm_call", { @@ -562,6 +622,14 @@ export class Agent { messages: this.agent.state.messages.length, }); await this.agent.prompt(prompt); + await this.enforceWebFetchAfterSearchIfNeeded({ + toolExecutionStartIndex, + userPrompt: prompt, + }); + await this.enforceCrossTurnWebFetchIfNeeded({ + toolExecutionStartIndex, + userPrompt: prompt, + }); this.runLog.log("llm_result", { duration_ms: Date.now() - llmStart, }); @@ -693,6 +761,7 @@ export class Agent { this._lastEventSavedAssistant = undefined; this.currentUserDisplayPrompt = undefined; this.currentUserSource = undefined; + this.currentRunToolExecutions = []; this.runLog.flush().catch(() => {}); } } @@ -782,6 +851,125 @@ export class Agent { this.session.setApiKey(this.currentApiKey); } + private async enforceWebFetchAfterSearchIfNeeded(params: { + toolExecutionStartIndex: number; + userPrompt: string; + }): Promise { + if (this._internalRun) return; + + const activeTools = new Set( + (this.agent.state.tools ?? []).map((tool) => tool.name.toLowerCase()), + ); + const webSearchAvailable = activeTools.has("web_search"); + const webFetchAvailable = activeTools.has("web_fetch"); + + const currentTurnExecutions = this.currentRunToolExecutions.slice( + params.toolExecutionStartIndex, + ); + const usage = summarizeWebToolUsage(currentTurnExecutions); + const requirement = resolveWebFetchRequirementFromPrompt(params.userPrompt); + + if ( + !shouldEnforceWebFetchAfterSearch({ + usage, + webSearchAvailable, + webFetchAvailable, + requiredMinFetchSuccess: requirement.requiredMinFetchSuccess, + }) + ) { + return; + } + + const { prompt, additionalFetchNeeded } = buildWebSearchFetchEnforcementPrompt({ + requiredMinFetchSuccess: requirement.requiredMinFetchSuccess, + fetchSuccess: usage.fetchSuccess, + needsFollowupForLatestSearch: usage.searchNeedsFollowupFetch, + }); + + this.runLog.log("web_search_fetch_guard", { + search_calls: usage.searchCalls, + search_success: usage.searchSuccess, + search_with_results: usage.searchSuccessWithResults, + search_needs_followup_fetch: usage.searchNeedsFollowupFetch, + fetch_calls: usage.fetchCalls, + fetch_success: usage.fetchSuccess, + required_min_fetch_success: requirement.requiredMinFetchSuccess, + prompt_suggests_research_depth: requirement.promptSuggestsResearchDepth, + prompt_multi_source_cue: requirement.multiSourceCue, + prompt_explicit_min_fetch: requirement.explicitMinFetchFromPrompt, + }); + + try { + await this.agent.prompt(prompt); + this.runLog.log("web_search_fetch_guard_applied", { + search_with_results: usage.searchSuccessWithResults, + search_needs_followup_fetch: usage.searchNeedsFollowupFetch, + required_min_fetch_success: requirement.requiredMinFetchSuccess, + additional_fetch_needed: additionalFetchNeeded, + }); + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + this.runLog.log("web_search_fetch_guard_failed", { + error: message.slice(0, 200), + }); + if (this.debug) { + this.stderr.write(`[web-guard] Failed to enforce search->fetch: ${message}\n`); + } + } + } + + private async enforceCrossTurnWebFetchIfNeeded(params: { + toolExecutionStartIndex: number; + userPrompt: string; + }): Promise { + if (this._internalRun) return; + + const activeTools = new Set( + (this.agent.state.tools ?? []).map((tool) => tool.name.toLowerCase()), + ); + const webFetchAvailable = activeTools.has("web_fetch"); + const currentTurnExecutions = this.currentRunToolExecutions.slice( + params.toolExecutionStartIndex, + ); + const usage = summarizeWebToolUsage(currentTurnExecutions); + const analysis = analyzeCrossTurnWebFetchNeed({ + usage, + webFetchAvailable, + userPrompt: params.userPrompt, + assistantText: this.output.state.lastAssistantText ?? "", + }); + + if (!analysis.shouldEnforce) return; + + this.runLog.log("web_cross_turn_fetch_guard", { + fetch_calls: usage.fetchCalls, + fetch_success: usage.fetchSuccess, + explicit_fetch_request: analysis.explicitFetchRequest, + user_provides_url: analysis.userProvidesUrl, + freshness_cue: analysis.freshnessCue, + web_cue: analysis.webCue, + user_needs_fresh_web_evidence: analysis.userNeedsFreshWebEvidence, + user_blocks_web_fetch: analysis.userBlocksWebFetch, + assistant_web_claim_signal: analysis.assistantHasWebClaimSignal, + }); + + try { + await this.agent.prompt(CROSS_TURN_WEB_FETCH_ENFORCEMENT_PROMPT); + this.runLog.log("web_cross_turn_fetch_guard_applied", { + explicit_fetch_request: analysis.explicitFetchRequest, + user_provides_url: analysis.userProvidesUrl, + }); + } catch (error) { + const message = error instanceof Error ? error.message : String(error); + this.runLog.log("web_cross_turn_fetch_guard_failed", { + error: message.slice(0, 200), + }); + if (this.debug) { + this.stderr.write(`[web-cross-turn-guard] Failed to enforce fetch: ${message}\n`); + } + } + } + private handleRunLogEvent(event: AgentEvent) { if (event.type === "tool_execution_start") { const toolName = (event as any).toolName ?? "unknown"; @@ -801,11 +989,18 @@ export class Agent { const resultText = extractRunLogResultText(result); const resultChars = resultText?.length ?? 0; const details = extractRunLogResultDetails(result); + const isError = Boolean((event as any).isError ?? false); + + this.currentRunToolExecutions.push({ + toolName, + isError, + details, + }); const toolEndData: Record = { tool: toolName, duration_ms, - is_error: (event as any).isError ?? false, + is_error: isError, result_chars: resultChars, result_summary: formatRunLogToolSummary(toolName, details), }; diff --git a/packages/core/src/agent/system-prompt/sections.test.ts b/packages/core/src/agent/system-prompt/sections.test.ts index a1d2dd5a..75763e1e 100644 --- a/packages/core/src/agent/system-prompt/sections.test.ts +++ b/packages/core/src/agent/system-prompt/sections.test.ts @@ -181,7 +181,7 @@ describe("buildConditionalToolSections", () => { const result = buildConditionalToolSections(["web_search"], "full"); const text = result.join("\n"); expect(text).toContain("## Web Access"); - expect(text).toContain("Web usage is conditional, not mandatory"); + expect(text).toContain("you MUST call web_fetch"); }); it("adds dynamic evidence decision guidance when data tool is present", () => { diff --git a/packages/core/src/agent/system-prompt/sections.ts b/packages/core/src/agent/system-prompt/sections.ts index bf935fce..9fdb2591 100644 --- a/packages/core/src/agent/system-prompt/sections.ts +++ b/packages/core/src/agent/system-prompt/sections.ts @@ -364,14 +364,12 @@ export function buildConditionalToolSections( "## Web Access", "You have web access. Use it when the user asks about current events, needs up-to-date information, or requests content from URLs.", "Prefer web_search for discovery and web_fetch for specific URLs.", - "Web usage is conditional, not mandatory: call web tools when they materially improve evidence quality.", + "When web_search is used, treat snippets as incomplete previews rather than final evidence.", "", "### Search-then-Fetch", - "After web_search, evaluate whether the snippets contain enough detail to answer accurately.", - "If not, use web_fetch on the 1-3 most relevant URLs to get full content before answering.", - "Always fetch when the user asks for detailed explanations, comparisons, or analysis;", - "when snippets are vague or contradictory; or when the question requires specific data points.", - "Skip fetch when the answer is a simple fact clearly stated in the snippet or the user only wants a quick overview.", + "After every successful web_search, you MUST call web_fetch on 1-3 relevant URLs before detailed reasoning or factual claims.", + "Use fetched page content (not snippets) as the primary evidence for analysis and synthesis.", + "If all fetch attempts fail, explicitly report that limitation and avoid specific claims derived only from snippets.", "", ); } diff --git a/packages/core/src/agent/web-tools-policy.test.ts b/packages/core/src/agent/web-tools-policy.test.ts new file mode 100644 index 00000000..2246fe48 --- /dev/null +++ b/packages/core/src/agent/web-tools-policy.test.ts @@ -0,0 +1,327 @@ +import { describe, expect, it } from "vitest"; +import { + analyzeCrossTurnWebFetchNeed, + resolveWebFetchRequirementFromPrompt, + shouldEnforceWebFetchAfterSearch, + summarizeWebToolUsage, + type ToolExecutionRecord, +} from "./web-tools-policy.js"; + +function buildRecord(params: { + toolName: string; + isError?: boolean; + details?: Record | null; +}): ToolExecutionRecord { + return { + toolName: params.toolName, + isError: params.isError ?? false, + details: params.details ?? null, + }; +} + +describe("web-tools-policy", () => { + describe("summarizeWebToolUsage", () => { + it("counts successful web_search calls with results", () => { + const usage = summarizeWebToolUsage([ + buildRecord({ + toolName: "web_search", + details: { count: 3, results: [{}, {}, {}] }, + }), + ]); + + expect(usage.searchCalls).toBe(1); + expect(usage.searchSuccess).toBe(1); + expect(usage.searchSuccessWithResults).toBe(1); + expect(usage.searchNeedsFollowupFetch).toBe(true); + expect(usage.fetchCalls).toBe(0); + expect(usage.fetchSuccess).toBe(0); + }); + + it("does not count tool-level error payload as success", () => { + const usage = summarizeWebToolUsage([ + buildRecord({ + toolName: "web_search", + details: { error: true, code: "search_failed" }, + }), + ]); + + expect(usage.searchCalls).toBe(1); + expect(usage.searchSuccess).toBe(0); + expect(usage.searchSuccessWithResults).toBe(0); + expect(usage.searchNeedsFollowupFetch).toBe(false); + }); + + it("marks latest search as covered when successful fetch follows", () => { + const usage = summarizeWebToolUsage([ + buildRecord({ + toolName: "web_search", + details: { count: 1, results: [{}] }, + }), + buildRecord({ + toolName: "web_fetch", + details: { status: 200, length: 1024 }, + }), + ]); + + expect(usage.searchNeedsFollowupFetch).toBe(false); + }); + }); + + describe("shouldEnforceWebFetchAfterSearch", () => { + it("enforces when search has results but fetch never succeeded", () => { + const usage = summarizeWebToolUsage([ + buildRecord({ + toolName: "web_search", + details: { count: 2, results: [{}, {}] }, + }), + ]); + + expect( + shouldEnforceWebFetchAfterSearch({ + usage, + webSearchAvailable: true, + webFetchAvailable: true, + }), + ).toBe(true); + }); + + it("does not enforce after a successful web_fetch", () => { + const usage = summarizeWebToolUsage([ + buildRecord({ + toolName: "web_search", + details: { count: 2, results: [{}, {}] }, + }), + buildRecord({ + toolName: "web_fetch", + details: { status: 200, length: 1024 }, + }), + ]); + + expect( + shouldEnforceWebFetchAfterSearch({ + usage, + webSearchAvailable: true, + webFetchAvailable: true, + }), + ).toBe(false); + }); + + it("enforces when the latest successful search has no follow-up fetch", () => { + const usage = summarizeWebToolUsage([ + buildRecord({ + toolName: "web_search", + details: { count: 2, results: [{}, {}] }, + }), + buildRecord({ + toolName: "web_fetch", + details: { status: 200, length: 1200 }, + }), + buildRecord({ + toolName: "web_search", + details: { count: 3, results: [{}, {}, {}] }, + }), + ]); + + expect( + shouldEnforceWebFetchAfterSearch({ + usage, + webSearchAvailable: true, + webFetchAvailable: true, + }), + ).toBe(true); + }); + + it("enforces when prompt requires deeper evidence coverage", () => { + const usage = summarizeWebToolUsage([ + buildRecord({ + toolName: "web_search", + details: { count: 6, results: [{}, {}, {}] }, + }), + buildRecord({ + toolName: "web_fetch", + details: { status: 200, length: 2200 }, + }), + ]); + + expect( + shouldEnforceWebFetchAfterSearch({ + usage, + webSearchAvailable: true, + webFetchAvailable: true, + requiredMinFetchSuccess: 2, + }), + ).toBe(true); + }); + + it("does not enforce when search returns no results", () => { + const usage = summarizeWebToolUsage([ + buildRecord({ + toolName: "web_search", + details: { count: 0, results: [] }, + }), + ]); + + expect( + shouldEnforceWebFetchAfterSearch({ + usage, + webSearchAvailable: true, + webFetchAvailable: true, + }), + ).toBe(false); + }); + + it("does not enforce when web_fetch is unavailable", () => { + const usage = summarizeWebToolUsage([ + buildRecord({ + toolName: "web_search", + details: { count: 1, results: [{}] }, + }), + ]); + + expect( + shouldEnforceWebFetchAfterSearch({ + usage, + webSearchAvailable: true, + webFetchAvailable: false, + }), + ).toBe(false); + }); + + it("enforces when fetch was attempted but failed", () => { + const usage = summarizeWebToolUsage([ + buildRecord({ + toolName: "web_search", + details: { count: 1, results: [{}] }, + }), + buildRecord({ + toolName: "web_fetch", + details: { error: true, code: "fetch_failed" }, + }), + ]); + + expect( + shouldEnforceWebFetchAfterSearch({ + usage, + webSearchAvailable: true, + webFetchAvailable: true, + }), + ).toBe(true); + }); + }); + + describe("analyzeCrossTurnWebFetchNeed", () => { + it("enforces when user explicitly asks to refetch page content", () => { + const usage = summarizeWebToolUsage([]); + const analysis = analyzeCrossTurnWebFetchNeed({ + usage, + webFetchAvailable: true, + userPrompt: "Please refetch the page body this turn and verify with sources.", + assistantText: "Here is a quick summary.", + }); + + expect(analysis.shouldEnforce).toBe(true); + expect(analysis.explicitFetchRequest).toBe(true); + }); + + it("enforces for freshness requests when assistant makes web-style claims", () => { + const usage = summarizeWebToolUsage([]); + const analysis = analyzeCrossTurnWebFetchNeed({ + usage, + webFetchAvailable: true, + userPrompt: "Give me the latest web news about OpenAI with sources.", + assistantText: "According to Reuters, OpenAI announced a new release.", + }); + + expect(analysis.shouldEnforce).toBe(true); + expect(analysis.freshnessCue).toBe(true); + expect(analysis.webCue).toBe(true); + expect(analysis.assistantHasWebClaimSignal).toBe(true); + }); + + it("does not enforce when a fetch was already attempted in this turn", () => { + const usage = summarizeWebToolUsage([ + buildRecord({ + toolName: "web_fetch", + details: { error: true, code: "fetch_failed" }, + }), + ]); + const analysis = analyzeCrossTurnWebFetchNeed({ + usage, + webFetchAvailable: true, + userPrompt: "Please verify with the latest web sources.", + assistantText: "According to Reuters, ...", + }); + + expect(analysis.shouldEnforce).toBe(false); + }); + + it("does not enforce when user explicitly blocks web fetch", () => { + const usage = summarizeWebToolUsage([]); + const analysis = analyzeCrossTurnWebFetchNeed({ + usage, + webFetchAvailable: true, + userPrompt: "Do not browse the web, only use snippets.", + assistantText: "According to Reuters, ...", + }); + + expect(analysis.shouldEnforce).toBe(false); + expect(analysis.userBlocksWebFetch).toBe(true); + }); + + it("enforces when user provides a direct URL but no fetch happened", () => { + const usage = summarizeWebToolUsage([]); + const analysis = analyzeCrossTurnWebFetchNeed({ + usage, + webFetchAvailable: true, + userPrompt: "Summarize https://example.com/article and include key takeaways.", + assistantText: "I can summarize it for you.", + }); + + expect(analysis.shouldEnforce).toBe(true); + expect(analysis.userProvidesUrl).toBe(true); + }); + + it("does not enforce for non-web freshness requests", () => { + const usage = summarizeWebToolUsage([]); + const analysis = analyzeCrossTurnWebFetchNeed({ + usage, + webFetchAvailable: true, + userPrompt: "What is the latest version in this repository?", + assistantText: "The latest version is 1.2.3.", + }); + + expect(analysis.shouldEnforce).toBe(false); + expect(analysis.freshnessCue).toBe(true); + expect(analysis.webCue).toBe(false); + }); + }); + + describe("resolveWebFetchRequirementFromPrompt", () => { + it("requires deeper fetch coverage for research-style prompts", () => { + const result = resolveWebFetchRequirementFromPrompt( + "帮我调研一下 APPLE 最近的产品信息,并做分析。", + ); + + expect(result.requiredMinFetchSuccess).toBe(2); + expect(result.promptSuggestsResearchDepth).toBe(true); + }); + + it("uses explicit minimum source count when present", () => { + const result = resolveWebFetchRequirementFromPrompt( + "Please use at least 3 sources and summarize the latest updates.", + ); + + expect(result.requiredMinFetchSuccess).toBe(3); + expect(result.explicitMinFetchFromPrompt).toBe(3); + }); + + it("falls back to 1 for simple prompts", () => { + const result = resolveWebFetchRequirementFromPrompt( + "What is OpenAI's CEO?", + ); + + expect(result.requiredMinFetchSuccess).toBe(1); + expect(result.promptSuggestsResearchDepth).toBe(false); + }); + }); +}); diff --git a/packages/core/src/agent/web-tools-policy.ts b/packages/core/src/agent/web-tools-policy.ts new file mode 100644 index 00000000..9667af04 --- /dev/null +++ b/packages/core/src/agent/web-tools-policy.ts @@ -0,0 +1,262 @@ +export type ToolExecutionRecord = { + toolName: string; + isError: boolean; + details: Record | null; +}; + +export type WebToolUsage = { + searchCalls: number; + searchSuccess: number; + searchSuccessWithResults: number; + /** True when the latest successful search (with results) has no later successful fetch. */ + searchNeedsFollowupFetch: boolean; + fetchCalls: number; + fetchSuccess: number; +}; + +export type WebFetchRequirement = { + requiredMinFetchSuccess: number; + promptSuggestsResearchDepth: boolean; + multiSourceCue: boolean; + explicitMinFetchFromPrompt: number | null; +}; + +export type CrossTurnWebFetchGuardAnalysis = { + shouldEnforce: boolean; + explicitFetchRequest: boolean; + userProvidesUrl: boolean; + freshnessCue: boolean; + webCue: boolean; + userNeedsFreshWebEvidence: boolean; + userBlocksWebFetch: boolean; + assistantHasWebClaimSignal: boolean; +}; + +const URL_PATTERN = /https?:\/\/[^\s)]+/i; + +const USER_EXPLICIT_FETCH_PATTERNS: RegExp[] = [ + /\b(re[-\s]?fetch|fetch (again|fresh)|verify with sources?|cite sources?|provide (sources?|links?))\b/i, + /\b(revisit|revalidate|double-check)\b.*\b(source|link|url|web|website)\b/i, + /(?:\u672c\u8f6e|\u8fd9\u4e00\u8f6e).*(?:\u91cd\u65b0|\u518d\u6b21).*(?:\u6293\u53d6|\u83b7\u53d6|\u62c9\u53d6)/, + /(?:\u91cd\u65b0|\u518d\u6b21).*(?:\u6293\u53d6|\u83b7\u53d6).*(?:\u7f51\u9875|\u6b63\u6587|\u539f\u6587|\u94fe\u63a5)/, + /(?:\u7ed9\u51fa|\u63d0\u4f9b).*(?:\u6765\u6e90|\u94fe\u63a5|\u5f15\u7528)/, + /(?:\u6838\u5b9e|\u67e5\u8bc1|\u9a8c\u8bc1).*(?:\u6765\u6e90|\u7f51\u9875)/, +]; + +const USER_FRESHNESS_PATTERNS: RegExp[] = [ + /\b(latest|most recent|recent|today|current|up-to-date|newest|breaking)\b/i, + /\b(news|update|updates)\b/i, + /(?:\u6700\u65b0|\u6700\u8fd1|\u4eca\u5929|\u5f53\u524d|\u8fd1\u671f|\u52a8\u6001|\u65b0\u95fb|\u8d44\u8baf)/, +]; + +const USER_WEB_CONTEXT_PATTERNS: RegExp[] = [ + /\b(web|internet|online|url|urls|link|links|website|article|source|sources|news)\b/i, + /(?:\u7f51\u9875|\u7f51\u7ad9|\u7f51\u7edc|\u4e92\u8054\u7f51|\u94fe\u63a5|\u6765\u6e90|\u65b0\u95fb|\u62a5\u9053|\u6587\u7ae0)/, +]; + +const USER_RESEARCH_DEPTH_PATTERNS: RegExp[] = [ + /\b(research|investigate|analysis|analyze|compare|comparison|deep[-\s]?dive|survey|report|review)\b/i, + /(?:\u8c03\u7814|\u7814\u7a76|\u5206\u6790|\u6df1\u5ea6|\u5bf9\u6bd4|\u5bf9\u7167|\u6c47\u603b|\u76d8\u70b9|\u62a5\u544a|\u8bc4\u4f30|\u8bc4\u6d4b)/, +]; + +const USER_MULTI_SOURCE_PATTERNS: RegExp[] = [ + /\b(multiple|multi-source|across sources|different sources)\b/i, + /(?:\u591a\u6765\u6e90|\u591a\u4e2a\u6765\u6e90|\u4e0d\u540c\u6765\u6e90|\u591a\u7f51\u7ad9)/, + /(?:\u81f3\u5c11|\u4e0d\u5c11\u4e8e|\u6700\u5c11)\s*\d+\s*(?:\u4e2a|\u6761)?(?:\u6765\u6e90|\u94fe\u63a5|\u7f51\u5740|\u7f51\u9875|\u6587\u7ae0)/, +]; + +const USER_WEB_BLOCK_PATTERNS: RegExp[] = [ + /\b(do not|don't|no|without)\s+(browse|web|internet|web_search|web_fetch|fetch)\b/i, + /\bonly\b.*\b(snippet|snippets)\b/i, + /(?:\u4e0d\u8981|\u4e0d\u9700)\s*(?:\u8054\u7f51|\u6293\u53d6|\u641c\u7d22|\u83b7\u53d6\u7f51\u9875|web_fetch|web_search)/, + /(?:\u4ec5|\u53ea).*(?:snippet|\u6458\u8981)/i, +]; + +const ASSISTANT_WEB_CLAIM_PATTERNS: RegExp[] = [ + /\b(according to|reported by|as reported|source|sources|citation|cited|press release)\b/i, + /\b(reuters|bloomberg|associated press|ap news|financial times|wall street journal)\b/i, + /(?:\u636e[^。\n]{0,24}(?:\u62a5\u9053|\u663e\u793a|\u79f0)|\u6765\u6e90|\u62a5\u9053\u79f0|\u516c\u544a|\u53d1\u5e03|\u5ba3\u5e03)/, +]; + +function hasAnyPattern(text: string, patterns: RegExp[]): boolean { + if (!text.trim()) return false; + return patterns.some((pattern) => pattern.test(text)); +} + +function normalizeMinFetchSuccess(raw: number): number { + if (!Number.isFinite(raw)) return 1; + return Math.max(1, Math.min(4, Math.floor(raw))); +} + +function extractExplicitMinFetchFromPrompt(prompt: string): number | null { + const patterns: RegExp[] = [ + /\b(?:at least|minimum of|no less than)\s*(\d+)\s*(?:sources?|links?|urls?|articles?|pages?)\b/i, + /(?:\u81f3\u5c11|\u4e0d\u5c11\u4e8e|\u6700\u5c11)\s*(\d+)\s*(?:\u4e2a|\u6761)?(?:\u6765\u6e90|\u94fe\u63a5|\u7f51\u5740|\u7f51\u9875|\u6587\u7ae0)/, + ]; + + for (const pattern of patterns) { + const match = prompt.match(pattern); + if (!match) continue; + const parsed = Number(match[1]); + if (!Number.isFinite(parsed)) continue; + return normalizeMinFetchSuccess(parsed); + } + + return null; +} + +function hasToolError(details: Record | null): boolean { + return details?.error === true; +} + +function getSearchResultCount(details: Record | null): number { + if (!details) return 0; + const countRaw = details.count; + if (typeof countRaw === "number" && Number.isFinite(countRaw)) { + return Math.max(0, Math.floor(countRaw)); + } + + const results = details.results; + if (Array.isArray(results)) { + return results.length; + } + + return 0; +} + +function isSuccessfulExecution(record: ToolExecutionRecord): boolean { + if (record.isError) return false; + if (hasToolError(record.details)) return false; + return true; +} + +export function summarizeWebToolUsage(records: ToolExecutionRecord[]): WebToolUsage { + const usage: WebToolUsage = { + searchCalls: 0, + searchSuccess: 0, + searchSuccessWithResults: 0, + searchNeedsFollowupFetch: false, + fetchCalls: 0, + fetchSuccess: 0, + }; + let pendingSearchWithResults = false; + + for (const record of records) { + const toolName = record.toolName.trim().toLowerCase(); + + if (toolName === "web_search") { + usage.searchCalls += 1; + if (isSuccessfulExecution(record)) { + usage.searchSuccess += 1; + if (getSearchResultCount(record.details) > 0) { + usage.searchSuccessWithResults += 1; + pendingSearchWithResults = true; + } + } + continue; + } + + if (toolName === "web_fetch") { + usage.fetchCalls += 1; + if (isSuccessfulExecution(record)) { + usage.fetchSuccess += 1; + pendingSearchWithResults = false; + } + } + } + + usage.searchNeedsFollowupFetch = pendingSearchWithResults; + return usage; +} + +export function shouldEnforceWebFetchAfterSearch(params: { + usage: WebToolUsage; + webSearchAvailable: boolean; + webFetchAvailable: boolean; + requiredMinFetchSuccess?: number; +}): boolean { + const { + usage, + webSearchAvailable, + webFetchAvailable, + requiredMinFetchSuccess = 1, + } = params; + + if (!webSearchAvailable || !webFetchAvailable) return false; + if (usage.searchSuccessWithResults <= 0) return false; + if (usage.fetchSuccess <= 0) return true; + if (usage.searchNeedsFollowupFetch) return true; + if (usage.fetchSuccess < normalizeMinFetchSuccess(requiredMinFetchSuccess)) return true; + + return false; +} + +export function resolveWebFetchRequirementFromPrompt(prompt: string): WebFetchRequirement { + const normalizedPrompt = prompt ?? ""; + const promptSuggestsResearchDepth = hasAnyPattern( + normalizedPrompt, + USER_RESEARCH_DEPTH_PATTERNS, + ); + const multiSourceCue = hasAnyPattern(normalizedPrompt, USER_MULTI_SOURCE_PATTERNS); + const explicitMinFetchFromPrompt = extractExplicitMinFetchFromPrompt(normalizedPrompt); + + let requiredMinFetchSuccess = 1; + if (promptSuggestsResearchDepth) requiredMinFetchSuccess = 2; + if (multiSourceCue) requiredMinFetchSuccess = Math.max(requiredMinFetchSuccess, 2); + if (explicitMinFetchFromPrompt !== null) { + requiredMinFetchSuccess = Math.max( + requiredMinFetchSuccess, + explicitMinFetchFromPrompt, + ); + } + + return { + requiredMinFetchSuccess: normalizeMinFetchSuccess(requiredMinFetchSuccess), + promptSuggestsResearchDepth, + multiSourceCue, + explicitMinFetchFromPrompt, + }; +} + +export function analyzeCrossTurnWebFetchNeed(params: { + usage: WebToolUsage; + webFetchAvailable: boolean; + userPrompt: string; + assistantText: string; +}): CrossTurnWebFetchGuardAnalysis { + const userPrompt = params.userPrompt ?? ""; + const assistantText = params.assistantText ?? ""; + + const explicitFetchRequest = hasAnyPattern( + userPrompt, + USER_EXPLICIT_FETCH_PATTERNS, + ); + const userProvidesUrl = URL_PATTERN.test(userPrompt); + const freshnessCue = hasAnyPattern(userPrompt, USER_FRESHNESS_PATTERNS); + const webCue = userProvidesUrl || hasAnyPattern(userPrompt, USER_WEB_CONTEXT_PATTERNS); + const userNeedsFreshWebEvidence = + explicitFetchRequest || userProvidesUrl || (freshnessCue && webCue); + const userBlocksWebFetch = hasAnyPattern(userPrompt, USER_WEB_BLOCK_PATTERNS); + const assistantHasWebClaimSignal = + URL_PATTERN.test(assistantText) || + hasAnyPattern(assistantText, ASSISTANT_WEB_CLAIM_PATTERNS); + + const shouldEnforce = + params.webFetchAvailable && + params.usage.fetchCalls === 0 && + params.usage.fetchSuccess === 0 && + !userBlocksWebFetch && + userNeedsFreshWebEvidence && + (explicitFetchRequest || userProvidesUrl || assistantHasWebClaimSignal); + + return { + shouldEnforce, + explicitFetchRequest, + userProvidesUrl, + freshnessCue, + webCue, + userNeedsFreshWebEvidence, + userBlocksWebFetch, + assistantHasWebClaimSignal, + }; +}